In [4]:
"""
mnist_loader
~~~~~~~~~~~~

A library to load the MNIST image data.  For details of the data
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
"""

#### Libraries
# Standard library
import pickle as cPickle  # Sorry, workaround for Python 3.4
import gzip

# Third-party libraries
import numpy as np

def load_data():
    f = gzip.open('mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = cPickle.load(f, encoding='latin1')  # Make it work with Py3.x
    f.close()
    return (training_data, validation_data, test_data)

def load_data_wrapper():
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = zip(training_inputs, training_results)
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = zip(validation_inputs, va_d[1])
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = zip(test_inputs, te_d[1])
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

In [11]:
%time training_data, validation_data, test_data = load_data_wrapper()


CPU times: user 1.74 s, sys: 572 ms, total: 2.31 s
Wall time: 2.31 s

In [12]:
%time x, y = zip(*training_data)

x = np.array(x)
x = x.reshape(50000, 784)

y = np.array(y)
y = y.reshape(50000, 10)


CPU times: user 50.7 ms, sys: 35.4 ms, total: 86.1 ms
Wall time: 87.7 ms

In [13]:
import keras


import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.utils import np_utils, generic_utils

from sklearn.preprocessing import StandardScaler

np.random.seed(1337) # for reproducibility


def preprocess_data(X, scaler=None):
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(X)
    X = scaler.transform(X)
    return X, scaler

In [14]:
nb_classes = y.shape[1]
print(nb_classes, 'classes')

dims = x.shape[1]
print(dims, 'dims')


10 classes
784 dims

In [15]:
print("Building model...")

LAYER_ONE_SIZE = 1024
ITERATIONS = 15

model = Sequential()
model.add(Dense(dims, LAYER_ONE_SIZE, init='glorot_uniform'))
model.add(PReLU((LAYER_ONE_SIZE,)))
model.add(BatchNormalization((LAYER_ONE_SIZE,)))
model.add(Dropout(0.5))

# model.add(Dense(LAYER_ONE_SIZE, LAYER_ONE_SIZE, init='glorot_uniform'))
# model.add(PReLU((LAYER_ONE_SIZE,)))
# model.add(BatchNormalization((LAYER_ONE_SIZE,)))
# model.add(Dropout(0.5))

# model.add(Dense(LAYER_ONE_SIZE, LAYER_ONE_SIZE, init='glorot_uniform'))
# model.add(PReLU((LAYER_ONE_SIZE,)))
# model.add(BatchNormalization((LAYER_ONE_SIZE,)))
# model.add(Dropout(0.5))

model.add(Dense(LAYER_ONE_SIZE, nb_classes, init='glorot_uniform'))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer="adam")

print("Training model...")

%time model.fit(x, y, nb_epoch=ITERATIONS, batch_size=128, validation_split=0.15)


Building model...
Training model...
Train on 42500 samples, validate on 7500 samples
Epoch 0
42500/42500 [==============================] - 18s - loss: 0.4334 - val. loss: 0.1989
Epoch 1
42500/42500 [==============================] - 20s - loss: 0.1457 - val. loss: 0.1462
Epoch 2
42500/42500 [==============================] - 20s - loss: 0.1018 - val. loss: 0.1260
Epoch 3
42500/42500 [==============================] - 18s - loss: 0.0792 - val. loss: 0.1129
Epoch 4
42500/42500 [==============================] - 18s - loss: 0.0636 - val. loss: 0.1016
Epoch 5
42500/42500 [==============================] - 18s - loss: 0.0523 - val. loss: 0.0984
Epoch 6
42500/42500 [==============================] - 18s - loss: 0.0453 - val. loss: 0.0940
Epoch 7
42500/42500 [==============================] - 19s - loss: 0.0393 - val. loss: 0.0914
Epoch 8
42500/42500 [==============================] - 18s - loss: 0.0340 - val. loss: 0.0912
Epoch 9
42500/42500 [==============================] - 19s - loss: 0.0309 - val. loss: 0.0892
Epoch 10
42500/42500 [==============================] - 17s - loss: 0.0252 - val. loss: 0.0868
Epoch 11
42500/42500 [==============================] - 31s - loss: 0.0220 - val. loss: 0.0859
Epoch 12
42500/42500 [==============================] - 33s - loss: 0.0234 - val. loss: 0.0854
Epoch 13
42500/42500 [==============================] - 30s - loss: 0.0202 - val. loss: 0.0846
Epoch 14
42500/42500 [==============================] - 29s - loss: 0.0189 - val. loss: 0.0829
CPU times: user 6min 52s, sys: 34.6 s, total: 7min 27s
Wall time: 5min 33s
Out[15]:
<keras.callbacks.History at 0x11c68c0f0>

In [16]:
p_x, p_y = zip(*test_data)

p_x = np.array(p_x)
p_x = p_x.reshape(10000, 784)

p_y = np.array(p_y)
p_y = p_y.reshape(10000)

In [17]:
%time preds = model.predict(p_x)
print(preds[1])
print(np.argmax(preds[1]))


10000/10000 [==============================] - 1s     
CPU times: user 1.78 s, sys: 47.6 ms, total: 1.83 s
Wall time: 1.59 s
[  1.14920814e-05   2.98629700e-06   9.99972756e-01   7.24334432e-06
   4.72607638e-09   1.19592658e-07   2.39666688e-06   4.17933766e-09
   2.99687912e-06   3.64769792e-10]
2

In [18]:
pred_idx = [np.argmax(a) for a in preds]

In [19]:
pairs = zip(pred_idx, p_y)
print(len(p_y))


10000

In [20]:
number_correct = sum([int(a == b) for a, b in pairs])
print(number_correct)


9812

In [21]:
number_incorrect = len(p_y) - number_correct
print(number_incorrect)


188

In [22]:
eps = 0.000001  # avoid division by zero
success_rate = number_correct / float((number_correct + number_incorrect + eps))
print(success_rate)


0.98119999990188

In [23]:
4 !=2


Out[23]:
True

In [ ]:


In [ ]: