In [16]:
from utils import *

In [17]:
path = 'data/mnist/'
# path = 'data/mnist/sample' # sample path

In [18]:
train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')

In [19]:
train = train.values
test = test.values
# 打乱
train = np.random.permutation(train)

In [20]:
valid = train[:2000, :]
valid_data = valid[:, 1:]
valid_label = onehot(valid[:, 0])

In [6]:
train_data = reshape(train_data, [40000, 1, 28, 28])
valid_data = reshape(valid_data, [2000, 1, 28, 28])
train_data.shape, train_label.shape, valid_label.shape, valid_data.shape


Out[6]:
((40000, 1, 28, 28), (40000, 10), (2000, 10), (2000, 1, 28, 28))

In [21]:
train = train[2000:, :]

In [22]:
train_data = train[:, 1:]
train_label = onehot(train[:, 0])

In [23]:
train_data.shape, train_label.shape, valid_label.shape, valid_data.shape


Out[23]:
((40000, 784), (40000, 10), (2000, 10), (2000, 784))

In [24]:
train_data = reshape(train_data, [40000, 1, 28, 28])
valid_data = reshape(valid_data, [2000, 1, 28, 28])
train_data.shape, train_label.shape, valid_label.shape, valid_data.shape


Out[24]:
((40000, 1, 28, 28), (40000, 10), (2000, 10), (2000, 1, 28, 28))

In [25]:
mean_px = train_data.mean().astype(np.float32)
std_px = train_data.std().astype(np.float32)

In [26]:
def norm_input(x): return (x-mean_px)/std_px

In [27]:
BATCH_SIZE = 20

Linear model


In [35]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [36]:
lm = get_lin_model()

In [37]:
gen = image.ImageDataGenerator()
batches = gen.flow(train_data, train_label, batch_size=BATCH_SIZE)
val_batches = gen.flow(valid_data, valid_label, batch_size=BATCH_SIZE)

In [38]:
lm.fit_generator(
    batches, 
    samples_per_epoch=len(train_data) / 20, 
    nb_epoch=1, 
    validation_data=val_batches,
    nb_val_samples=len(valid_data) / 20
)


Epoch 1/1
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1038     try:
-> 1039       return fn(*args)
   1040     except errors.OpError as e:

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1020                                  feed_dict, fetch_list, target_list,
-> 1021                                  status, run_metadata)
   1022 

~/anaconda3/envs/py3Tfgpu/lib/python3.6/contextlib.py in __exit__(self, type, value, traceback)
     87             try:
---> 88                 next(self.gen)
     89             except StopIteration:

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
    465           compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466           pywrap_tensorflow.TF_GetCode(status))
    467   finally:

InvalidArgumentError: size 1 must be non-negative, not -67403719
	 [[Node: Reshape_2 = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"](truediv_14, stack_2)]]
	 [[Node: mul_30/_101 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_396_mul_30", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-38-584f0c1a3dd3> in <module>()
      4     nb_epoch=1,
      5     validation_data=val_batches,
----> 6     nb_val_samples=len(valid_data) / 20
      7 )

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/models.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch, **kwargs)
    933                                         nb_worker=nb_worker,
    934                                         pickle_safe=pickle_safe,
--> 935                                         initial_epoch=initial_epoch)
    936 
    937     def evaluate_generator(self, generator, val_samples,

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/training.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
   1555                     outs = self.train_on_batch(x, y,
   1556                                                sample_weight=sample_weight,
-> 1557                                                class_weight=class_weight)
   1558 
   1559                     if not isinstance(outs, list):

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
   1318             ins = x + y + sample_weights
   1319         self._make_train_function()
-> 1320         outputs = self.train_function(ins)
   1321         if len(outputs) == 1:
   1322             return outputs[0]

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   1941         session = get_session()
   1942         updated = session.run(self.outputs + [self.updates_op],
-> 1943                               feed_dict=feed_dict)
   1944         return updated[:len(self.outputs)]
   1945 

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    776     try:
    777       result = self._run(None, fetches, feed_dict, options_ptr,
--> 778                          run_metadata_ptr)
    779       if run_metadata:
    780         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    980     if final_fetches or final_targets:
    981       results = self._do_run(handle, final_targets, final_fetches,
--> 982                              feed_dict_string, options, run_metadata)
    983     else:
    984       results = []

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1030     if handle is None:
   1031       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032                            target_list, options, run_metadata)
   1033     else:
   1034       return self._do_call(_prun_fn, self._session, handle, feed_dict,

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1050         except KeyError:
   1051           pass
-> 1052       raise type(e)(node_def, op, message)
   1053 
   1054   def _extend_graph(self):

InvalidArgumentError: size 1 must be non-negative, not -67403719
	 [[Node: Reshape_2 = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"](truediv_14, stack_2)]]
	 [[Node: mul_30/_101 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_396_mul_30", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'Reshape_2', defined at:
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-36-691a46c2e116>", line 1, in <module>
    lm = get_lin_model()
  File "<ipython-input-35-356c06aa12d4>", line 5, in get_lin_model
    Dense(10, activation='softmax')
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/models.py", line 273, in __init__
    self.add(layer)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/models.py", line 332, in add
    output_tensor = layer(self.outputs[0])
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/topology.py", line 572, in __call__
    self.add_inbound_node(inbound_layers, node_indices, tensor_indices)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/topology.py", line 635, in add_inbound_node
    Node.create_node(self, inbound_layers, node_indices, tensor_indices)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/topology.py", line 166, in create_node
    output_tensors = to_list(outbound_layer.call(input_tensors[0], mask=input_masks[0]))
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/layers/core.py", line 483, in call
    return K.batch_flatten(x)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 1629, in batch_flatten
    x = tf.reshape(x, stack([-1, prod(shape(x)[1:])]))
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2510, in reshape
    name=name)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): size 1 must be non-negative, not -67403719
	 [[Node: Reshape_2 = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"](truediv_14, stack_2)]]
	 [[Node: mul_30/_101 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_396_mul_30", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

In [167]:
lm.optimizer.lr=0.1

In [169]:
lm.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 5s - loss: 0.2770 - acc: 0.9225 - val_loss: 0.2734 - val_acc: 0.9252
Out[169]:
<keras.callbacks.History at 0x7f3782f7b710>

In [172]:
lm.optimizer.lr=0.01

In [173]:
lm.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 5s - loss: 0.2710 - acc: 0.9241 - val_loss: 0.2858 - val_acc: 0.9216
Epoch 2/4
60000/60000 [==============================] - 5s - loss: 0.2667 - acc: 0.9249 - val_loss: 0.2764 - val_acc: 0.9242
Epoch 3/4
60000/60000 [==============================] - 4s - loss: 0.2707 - acc: 0.9249 - val_loss: 0.2759 - val_acc: 0.9219
Epoch 4/4
60000/60000 [==============================] - 4s - loss: 0.2603 - acc: 0.9267 - val_loss: 0.2810 - val_acc: 0.9240
Out[173]:
<keras.callbacks.History at 0x7f3782f7b950>

Single dense layer


In [16]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [17]:
fc = get_fc_model()

In [21]:
len(X_train)


Out[21]:
40000

In [18]:
BATCH_SIZE = 20

In [19]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=BATCH_SIZE)
test_batches = gen.flow(X_test, y_test, batch_size=BATCH_SIZE)

In [20]:
fc.fit_generator(
    batches, 
    samples_per_epoch=len(X_train) / BATCH_SIZE, 
    nb_epoch=1, 
    validation_data=test_batches,
    nb_val_samples=len(X_test) / BATCH_SIZE
)


Epoch 1/1
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1038     try:
-> 1039       return fn(*args)
   1040     except errors.OpError as e:

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1020                                  feed_dict, fetch_list, target_list,
-> 1021                                  status, run_metadata)
   1022 

~/anaconda3/envs/py3Tfgpu/lib/python3.6/contextlib.py in __exit__(self, type, value, traceback)
     87             try:
---> 88                 next(self.gen)
     89             except StopIteration:

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
    465           compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466           pywrap_tensorflow.TF_GetCode(status))
    467   finally:

InvalidArgumentError: Input to reshape is a tensor with 15680 values, but the requested shape requires a multiple of 1125580800
	 [[Node: Reshape_1 = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"](truediv_9, stack_1)]]
	 [[Node: mul_26/_67 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_603_mul_26", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-20-14b034845a59> in <module>()
      4     nb_epoch=1,
      5     validation_data=test_batches,
----> 6     nb_val_samples=len(X_test) / BATCH_SIZE
      7 )

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/models.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch, **kwargs)
    933                                         nb_worker=nb_worker,
    934                                         pickle_safe=pickle_safe,
--> 935                                         initial_epoch=initial_epoch)
    936 
    937     def evaluate_generator(self, generator, val_samples,

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/training.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
   1555                     outs = self.train_on_batch(x, y,
   1556                                                sample_weight=sample_weight,
-> 1557                                                class_weight=class_weight)
   1558 
   1559                     if not isinstance(outs, list):

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
   1318             ins = x + y + sample_weights
   1319         self._make_train_function()
-> 1320         outputs = self.train_function(ins)
   1321         if len(outputs) == 1:
   1322             return outputs[0]

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   1941         session = get_session()
   1942         updated = session.run(self.outputs + [self.updates_op],
-> 1943                               feed_dict=feed_dict)
   1944         return updated[:len(self.outputs)]
   1945 

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    776     try:
    777       result = self._run(None, fetches, feed_dict, options_ptr,
--> 778                          run_metadata_ptr)
    779       if run_metadata:
    780         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    980     if final_fetches or final_targets:
    981       results = self._do_run(handle, final_targets, final_fetches,
--> 982                              feed_dict_string, options, run_metadata)
    983     else:
    984       results = []

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1030     if handle is None:
   1031       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032                            target_list, options, run_metadata)
   1033     else:
   1034       return self._do_call(_prun_fn, self._session, handle, feed_dict,

~/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1050         except KeyError:
   1051           pass
-> 1052       raise type(e)(node_def, op, message)
   1053 
   1054   def _extend_graph(self):

InvalidArgumentError: Input to reshape is a tensor with 15680 values, but the requested shape requires a multiple of 1125580800
	 [[Node: Reshape_1 = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"](truediv_9, stack_1)]]
	 [[Node: mul_26/_67 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_603_mul_26", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'Reshape_1', defined at:
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-7575368da131>", line 1, in <module>
    fc = get_fc_model()
  File "<ipython-input-16-bb7eefdd112f>", line 6, in get_fc_model
    Dense(10, activation='softmax')
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/models.py", line 273, in __init__
    self.add(layer)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/models.py", line 332, in add
    output_tensor = layer(self.outputs[0])
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/topology.py", line 572, in __call__
    self.add_inbound_node(inbound_layers, node_indices, tensor_indices)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/topology.py", line 635, in add_inbound_node
    Node.create_node(self, inbound_layers, node_indices, tensor_indices)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/engine/topology.py", line 166, in create_node
    output_tensors = to_list(outbound_layer.call(input_tensors[0], mask=input_masks[0]))
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/layers/core.py", line 483, in call
    return K.batch_flatten(x)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 1629, in batch_flatten
    x = tf.reshape(x, stack([-1, prod(shape(x)[1:])]))
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2510, in reshape
    name=name)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/quoniammm/anaconda3/envs/py3Tfgpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 15680 values, but the requested shape requires a multiple of 1125580800
	 [[Node: Reshape_1 = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"](truediv_9, stack_1)]]
	 [[Node: mul_26/_67 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_603_mul_26", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

In [184]:
fc.optimizer.lr=0.1

In [185]:
fc.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 5s - loss: 0.7502 - acc: 0.9241 - val_loss: 0.5514 - val_acc: 0.9290
Epoch 2/4
60000/60000 [==============================] - 5s - loss: 0.4507 - acc: 0.9338 - val_loss: 0.3896 - val_acc: 0.9321
Epoch 3/4
60000/60000 [==============================] - 5s - loss: 0.3507 - acc: 0.9357 - val_loss: 0.3417 - val_acc: 0.9306
Epoch 4/4
60000/60000 [==============================] - 5s - loss: 0.3069 - acc: 0.9374 - val_loss: 0.3091 - val_acc: 0.9325
Out[185]:
<keras.callbacks.History at 0x7f377d1c6210>

In [187]:
fc.optimizer.lr=0.01

In [189]:
fc.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 5s - loss: 0.2549 - acc: 0.9431 - val_loss: 0.2797 - val_acc: 0.9341
Epoch 2/4
60000/60000 [==============================] - 5s - loss: 0.2408 - acc: 0.9457 - val_loss: 0.2753 - val_acc: 0.9341
Epoch 3/4
60000/60000 [==============================] - 5s - loss: 0.2358 - acc: 0.9453 - val_loss: 0.2733 - val_acc: 0.9339
Epoch 4/4
60000/60000 [==============================] - 5s - loss: 0.2252 - acc: 0.9474 - val_loss: 0.2670 - val_acc: 0.9397
Out[189]:
<keras.callbacks.History at 0x7f377d1c6850>

Basic 'VGG-style' CNN


In [14]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,3,3, activation='relu'),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [45]:
model = get_model()

In [36]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 6s - loss: 0.1097 - acc: 0.9664 - val_loss: 0.0396 - val_acc: 0.9863
Out[36]:
<keras.callbacks.History at 0x7f380c53ffd0>

In [37]:
model.optimizer.lr=0.1

In [38]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 7s - loss: 0.0353 - acc: 0.9889 - val_loss: 0.0291 - val_acc: 0.9902
Out[38]:
<keras.callbacks.History at 0x7f3807ebbe10>

In [39]:
model.optimizer.lr=0.01

In [40]:
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/8
60000/60000 [==============================] - 6s - loss: 0.0232 - acc: 0.9929 - val_loss: 0.0207 - val_acc: 0.9935
Epoch 2/8
60000/60000 [==============================] - 6s - loss: 0.0193 - acc: 0.9935 - val_loss: 0.0252 - val_acc: 0.9919
Epoch 3/8
60000/60000 [==============================] - 6s - loss: 0.0155 - acc: 0.9949 - val_loss: 0.0298 - val_acc: 0.9919
Epoch 4/8
60000/60000 [==============================] - 6s - loss: 0.0133 - acc: 0.9958 - val_loss: 0.0313 - val_acc: 0.9913
Epoch 5/8
60000/60000 [==============================] - 6s - loss: 0.0095 - acc: 0.9970 - val_loss: 0.0327 - val_acc: 0.9913
Epoch 6/8
60000/60000 [==============================] - 6s - loss: 0.0107 - acc: 0.9966 - val_loss: 0.0301 - val_acc: 0.9906
Epoch 7/8
60000/60000 [==============================] - 7s - loss: 0.0070 - acc: 0.9979 - val_loss: 0.0269 - val_acc: 0.9938
Epoch 8/8
60000/60000 [==============================] - 6s - loss: 0.0082 - acc: 0.9975 - val_loss: 0.0261 - val_acc: 0.9926
Out[40]:
<keras.callbacks.History at 0x7f3807ebbc90>

Data augmentation


In [23]:
model = get_model()

In [76]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [24]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 7s - loss: 0.2064 - acc: 0.9360 - val_loss: 0.0643 - val_acc: 0.9778
Out[24]:
<keras.callbacks.History at 0x7fa800c8d710>

In [25]:
model.optimizer.lr=0.1

In [26]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 7s - loss: 0.0706 - acc: 0.9787 - val_loss: 0.0496 - val_acc: 0.9844
Epoch 2/4
60000/60000 [==============================] - 7s - loss: 0.0531 - acc: 0.9838 - val_loss: 0.0395 - val_acc: 0.9873
Epoch 3/4
60000/60000 [==============================] - 7s - loss: 0.0473 - acc: 0.9856 - val_loss: 0.0329 - val_acc: 0.9886
Epoch 4/4
60000/60000 [==============================] - 7s - loss: 0.0402 - acc: 0.9870 - val_loss: 0.0381 - val_acc: 0.9878
Out[26]:
<keras.callbacks.History at 0x7fa8003268d0>

In [27]:
model.optimizer.lr=0.01

In [28]:
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/8
60000/60000 [==============================] - 7s - loss: 0.0381 - acc: 0.9887 - val_loss: 0.0295 - val_acc: 0.9908
Epoch 2/8
60000/60000 [==============================] - 7s - loss: 0.0340 - acc: 0.9893 - val_loss: 0.0266 - val_acc: 0.9918
Epoch 3/8
60000/60000 [==============================] - 7s - loss: 0.0318 - acc: 0.9903 - val_loss: 0.0400 - val_acc: 0.9877
Epoch 4/8
60000/60000 [==============================] - 7s - loss: 0.0322 - acc: 0.9899 - val_loss: 0.0264 - val_acc: 0.9922
Epoch 5/8
60000/60000 [==============================] - 7s - loss: 0.0281 - acc: 0.9910 - val_loss: 0.0266 - val_acc: 0.9911
Epoch 6/8
60000/60000 [==============================] - 7s - loss: 0.0283 - acc: 0.9909 - val_loss: 0.0238 - val_acc: 0.9922
Epoch 7/8
60000/60000 [==============================] - 7s - loss: 0.0277 - acc: 0.9917 - val_loss: 0.0314 - val_acc: 0.9911
Epoch 8/8
60000/60000 [==============================] - 6s - loss: 0.0251 - acc: 0.9925 - val_loss: 0.0287 - val_acc: 0.9921
Out[28]:
<keras.callbacks.History at 0x7fa800326790>

In [29]:
model.optimizer.lr=0.001

In [30]:
model.fit_generator(batches, batches.N, nb_epoch=14, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 6s - loss: 0.0242 - acc: 0.9920 - val_loss: 0.0271 - val_acc: 0.9912
Epoch 2/12
60000/60000 [==============================] - 7s - loss: 0.0250 - acc: 0.9922 - val_loss: 0.0351 - val_acc: 0.9894
Epoch 3/12
60000/60000 [==============================] - 7s - loss: 0.0225 - acc: 0.9931 - val_loss: 0.0323 - val_acc: 0.9905
Epoch 4/12
60000/60000 [==============================] - 7s - loss: 0.0223 - acc: 0.9932 - val_loss: 0.0235 - val_acc: 0.9927
Epoch 5/12
60000/60000 [==============================] - 7s - loss: 0.0236 - acc: 0.9926 - val_loss: 0.0216 - val_acc: 0.9937
Epoch 6/12
60000/60000 [==============================] - 6s - loss: 0.0220 - acc: 0.9933 - val_loss: 0.0259 - val_acc: 0.9918
Epoch 7/12
60000/60000 [==============================] - 7s - loss: 0.0207 - acc: 0.9936 - val_loss: 0.0298 - val_acc: 0.9899
Epoch 8/12
60000/60000 [==============================] - 7s - loss: 0.0216 - acc: 0.9932 - val_loss: 0.0268 - val_acc: 0.9929
Epoch 9/12
60000/60000 [==============================] - 7s - loss: 0.0206 - acc: 0.9936 - val_loss: 0.0282 - val_acc: 0.9913
Epoch 10/12
60000/60000 [==============================] - 7s - loss: 0.0194 - acc: 0.9940 - val_loss: 0.0296 - val_acc: 0.9927
Epoch 11/12
60000/60000 [==============================] - 7s - loss: 0.0191 - acc: 0.9940 - val_loss: 0.0193 - val_acc: 0.9941
Epoch 12/12
60000/60000 [==============================] - 7s - loss: 0.0187 - acc: 0.9945 - val_loss: 0.0294 - val_acc: 0.9914
Out[30]:
<keras.callbacks.History at 0x7fa800326ad0>

In [31]:
model.optimizer.lr=0.0001

In [32]:
model.fit_generator(batches, batches.N, nb_epoch=10, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/10
60000/60000 [==============================] - 7s - loss: 0.0191 - acc: 0.9942 - val_loss: 0.0277 - val_acc: 0.9906
Epoch 2/10
60000/60000 [==============================] - 7s - loss: 0.0196 - acc: 0.9938 - val_loss: 0.0192 - val_acc: 0.9945
Epoch 3/10
60000/60000 [==============================] - 6s - loss: 0.0173 - acc: 0.9946 - val_loss: 0.0258 - val_acc: 0.9924
Epoch 4/10
60000/60000 [==============================] - 7s - loss: 0.0189 - acc: 0.9943 - val_loss: 0.0249 - val_acc: 0.9924
Epoch 5/10
60000/60000 [==============================] - 7s - loss: 0.0166 - acc: 0.9951 - val_loss: 0.0271 - val_acc: 0.9920
Epoch 6/10
60000/60000 [==============================] - 7s - loss: 0.0183 - acc: 0.9942 - val_loss: 0.0229 - val_acc: 0.9937
Epoch 7/10
60000/60000 [==============================] - 7s - loss: 0.0177 - acc: 0.9944 - val_loss: 0.0275 - val_acc: 0.9924
Epoch 8/10
60000/60000 [==============================] - 6s - loss: 0.0168 - acc: 0.9946 - val_loss: 0.0246 - val_acc: 0.9926
Epoch 9/10
60000/60000 [==============================] - 7s - loss: 0.0169 - acc: 0.9943 - val_loss: 0.0215 - val_acc: 0.9936
Epoch 10/10
60000/60000 [==============================] - 7s - loss: 0.0160 - acc: 0.9953 - val_loss: 0.0267 - val_acc: 0.9919
Out[32]:
<keras.callbacks.History at 0x7fa800326fd0>

Batchnorm + data augmentation


In [125]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [126]:
model = get_model_bn()

In [127]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 12s - loss: 0.1273 - acc: 0.9605 - val_loss: 0.0559 - val_acc: 0.9833
Out[127]:
<keras.callbacks.History at 0x7f37acf896d0>

In [128]:
model.optimizer.lr=0.1

In [129]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 13s - loss: 0.0555 - acc: 0.9827 - val_loss: 0.0439 - val_acc: 0.9859
Epoch 2/4
60000/60000 [==============================] - 13s - loss: 0.0455 - acc: 0.9859 - val_loss: 0.0337 - val_acc: 0.9899
Epoch 3/4
60000/60000 [==============================] - 13s - loss: 0.0377 - acc: 0.9882 - val_loss: 0.0332 - val_acc: 0.9890
Epoch 4/4
60000/60000 [==============================] - 13s - loss: 0.0372 - acc: 0.9884 - val_loss: 0.0303 - val_acc: 0.9904
Out[129]:
<keras.callbacks.History at 0x7f37acc5b450>

In [130]:
model.optimizer.lr=0.01

In [131]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 13s - loss: 0.0327 - acc: 0.9900 - val_loss: 0.0312 - val_acc: 0.9911
Epoch 2/12
60000/60000 [==============================] - 12s - loss: 0.0290 - acc: 0.9911 - val_loss: 0.0349 - val_acc: 0.9893
Epoch 3/12
60000/60000 [==============================] - 13s - loss: 0.0293 - acc: 0.9912 - val_loss: 0.0452 - val_acc: 0.9853
Epoch 4/12
60000/60000 [==============================] - 13s - loss: 0.0266 - acc: 0.9915 - val_loss: 0.0260 - val_acc: 0.9924
Epoch 5/12
60000/60000 [==============================] - 12s - loss: 0.0236 - acc: 0.9924 - val_loss: 0.0234 - val_acc: 0.9927
Epoch 6/12
60000/60000 [==============================] - 13s - loss: 0.0234 - acc: 0.9927 - val_loss: 0.0305 - val_acc: 0.9901
Epoch 7/12
60000/60000 [==============================] - 12s - loss: 0.0234 - acc: 0.9929 - val_loss: 0.0164 - val_acc: 0.9960
Epoch 8/12
60000/60000 [==============================] - 13s - loss: 0.0198 - acc: 0.9935 - val_loss: 0.0333 - val_acc: 0.9898
Epoch 9/12
60000/60000 [==============================] - 12s - loss: 0.0201 - acc: 0.9939 - val_loss: 0.0184 - val_acc: 0.9940
Epoch 10/12
60000/60000 [==============================] - 12s - loss: 0.0173 - acc: 0.9945 - val_loss: 0.0194 - val_acc: 0.9938
Epoch 11/12
60000/60000 [==============================] - 13s - loss: 0.0183 - acc: 0.9940 - val_loss: 0.0323 - val_acc: 0.9904
Epoch 12/12
60000/60000 [==============================] - 13s - loss: 0.0177 - acc: 0.9945 - val_loss: 0.0294 - val_acc: 0.9918
Out[131]:
<keras.callbacks.History at 0x7f37b176aa50>

In [132]:
model.optimizer.lr=0.001

In [133]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 13s - loss: 0.0166 - acc: 0.9947 - val_loss: 0.0205 - val_acc: 0.9933
Epoch 2/12
60000/60000 [==============================] - 13s - loss: 0.0168 - acc: 0.9950 - val_loss: 0.0194 - val_acc: 0.9942
Epoch 3/12
60000/60000 [==============================] - 12s - loss: 0.0151 - acc: 0.9953 - val_loss: 0.0197 - val_acc: 0.9942
Epoch 4/12
60000/60000 [==============================] - 13s - loss: 0.0135 - acc: 0.9954 - val_loss: 0.0179 - val_acc: 0.9938
Epoch 5/12
60000/60000 [==============================] - 12s - loss: 0.0143 - acc: 0.9953 - val_loss: 0.0257 - val_acc: 0.9925
Epoch 6/12
60000/60000 [==============================] - 12s - loss: 0.0139 - acc: 0.9954 - val_loss: 0.0150 - val_acc: 0.9949
Epoch 7/12
60000/60000 [==============================] - 13s - loss: 0.0127 - acc: 0.9958 - val_loss: 0.0218 - val_acc: 0.9932
Epoch 8/12
60000/60000 [==============================] - 13s - loss: 0.0121 - acc: 0.9962 - val_loss: 0.0264 - val_acc: 0.9917
Epoch 9/12
60000/60000 [==============================] - 13s - loss: 0.0120 - acc: 0.9960 - val_loss: 0.0209 - val_acc: 0.9935
Epoch 10/12
60000/60000 [==============================] - 13s - loss: 0.0130 - acc: 0.9957 - val_loss: 0.0171 - val_acc: 0.9948
Epoch 11/12
60000/60000 [==============================] - 13s - loss: 0.0132 - acc: 0.9958 - val_loss: 0.0227 - val_acc: 0.9932
Epoch 12/12
60000/60000 [==============================] - 12s - loss: 0.0115 - acc: 0.9964 - val_loss: 0.0172 - val_acc: 0.9945
Out[133]:
<keras.callbacks.History at 0x7f37b1789c50>

Batchnorm + dropout + data augmentation


In [79]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [80]:
model = get_model_bn_do()

In [81]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 13s - loss: 0.1894 - acc: 0.9419 - val_loss: 0.0605 - val_acc: 0.9815
Out[81]:
<keras.callbacks.History at 0x7fa7cea0d950>

In [82]:
model.optimizer.lr=0.1

In [83]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 13s - loss: 0.0766 - acc: 0.9764 - val_loss: 0.0394 - val_acc: 0.9871
Epoch 2/4
60000/60000 [==============================] - 13s - loss: 0.0622 - acc: 0.9806 - val_loss: 0.0360 - val_acc: 0.9885
Epoch 3/4
60000/60000 [==============================] - 13s - loss: 0.0576 - acc: 0.9830 - val_loss: 0.0364 - val_acc: 0.9882
Epoch 4/4
60000/60000 [==============================] - 14s - loss: 0.0512 - acc: 0.9842 - val_loss: 0.0347 - val_acc: 0.9911
Out[83]:
<keras.callbacks.History at 0x7fa7ce2c69d0>

In [84]:
model.optimizer.lr=0.01

In [85]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 14s - loss: 0.0464 - acc: 0.9862 - val_loss: 0.0300 - val_acc: 0.9904
Epoch 2/12
60000/60000 [==============================] - 13s - loss: 0.0474 - acc: 0.9856 - val_loss: 0.0287 - val_acc: 0.9912
Epoch 3/12
60000/60000 [==============================] - 13s - loss: 0.0400 - acc: 0.9880 - val_loss: 0.0408 - val_acc: 0.9879
Epoch 4/12
60000/60000 [==============================] - 14s - loss: 0.0379 - acc: 0.9884 - val_loss: 0.0255 - val_acc: 0.9918
Epoch 5/12
60000/60000 [==============================] - 13s - loss: 0.0394 - acc: 0.9881 - val_loss: 0.0247 - val_acc: 0.9923
Epoch 6/12
60000/60000 [==============================] - 14s - loss: 0.0344 - acc: 0.9893 - val_loss: 0.0267 - val_acc: 0.9921
Epoch 7/12
60000/60000 [==============================] - 14s - loss: 0.0342 - acc: 0.9895 - val_loss: 0.0208 - val_acc: 0.9938
Epoch 8/12
60000/60000 [==============================] - 14s - loss: 0.0291 - acc: 0.9908 - val_loss: 0.0251 - val_acc: 0.9914
Epoch 9/12
60000/60000 [==============================] - 14s - loss: 0.0309 - acc: 0.9907 - val_loss: 0.0253 - val_acc: 0.9919
Epoch 10/12
60000/60000 [==============================] - 14s - loss: 0.0299 - acc: 0.9906 - val_loss: 0.0205 - val_acc: 0.9934
Epoch 11/12
60000/60000 [==============================] - 14s - loss: 0.0276 - acc: 0.9912 - val_loss: 0.0200 - val_acc: 0.9940
Epoch 12/12
60000/60000 [==============================] - 13s - loss: 0.0268 - acc: 0.9918 - val_loss: 0.0201 - val_acc: 0.9929
Out[85]:
<keras.callbacks.History at 0x7fa7ce2e1810>

In [86]:
model.optimizer.lr=0.001

In [89]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 13s - loss: 0.0186 - acc: 0.9942 - val_loss: 0.0193 - val_acc: 0.9945
Out[89]:
<keras.callbacks.History at 0x7fa7ce5cf290>

Ensembling


In [90]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, batches.N, nb_epoch=1, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.1
    model.fit_generator(batches, batches.N, nb_epoch=4, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.01
    model.fit_generator(batches, batches.N, nb_epoch=12, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.001
    model.fit_generator(batches, batches.N, nb_epoch=18, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    return model

In [91]:
models = [fit_model() for i in range(6)]

In [92]:
path = "data/mnist/"
model_path = path + 'models/'

In [93]:
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [94]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])


 9984/10000 [============================>.] - ETA: 0s

In [95]:
evals.mean(axis=0)


Out[95]:
array([ 0.016,  0.995])

In [96]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])

In [97]:
all_preds.shape


Out[97]:
(6, 10000, 10)

In [98]:
avg_preds = all_preds.mean(axis=0)

In [99]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()


Out[99]:
array(0.9969000220298767, dtype=float32)

In [ ]: