In [1]:
import os, sys
import pandas as pd
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline

HOME = os.getenv("HOME")
DATA_DIR = HOME + '/data/mnist/'

In [3]:
#One way to show an image
r = np.random.rand(28, 28, 3)
img = Image.fromarray(r, 'RGB')
img.show()

In [4]:
#another way
plt.imshow(r, interpolation='nearest')
plt.show()


Data Prep and Exploration


In [5]:
train = pd.read_csv(DATA_DIR + 'train.csv')

In [21]:
from keras.utils.np_utils import to_categorical

split_ind = int(0.8 * train.shape[0])
X = train.iloc[:, 1:].values / 255.0
X_reshape = np.reshape(X, (X.shape[0], 1, 28, 28))
X_train = X_reshape[:split_ind, :]
X_val = X_reshape[split_ind:, :] 
Y_train = train.iloc[:split_ind, 0].values
Y_val = train.iloc[split_ind:, 0].values
y_train_ohe = to_categorical(Y_train)
y_val_ohe = to_categorical(Y_val)

In [7]:
#plot the first few number images
s = 16
X_plot = np.reshape(X, (X.shape[0], 28, 28))
fig = plt.figure()
for i in range(16):
    fig.add_subplot(4, 4, i + 1)
    plt.imshow(X_plot[i] * 255.0, interpolation='nearest', cmap = 'gray')
    plt.xticks([])
    plt.yticks([])


Build the network


In [18]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD

Simple Conv2 Network


In [22]:
model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode = 'same', input_shape = (1, 28, 28), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides = None))
model.add(Flatten())

model.add(Dense(10, activation='softmax'))
model.compile(optimizer = 'adam',  loss='categorical_crossentropy', metrics = ['accuracy'])

model.fit(X_train, y_train_ohe, batch_size = 128, nb_epoch = 10, validation_data = (X_val, y_val_ohe))


Train on 33600 samples, validate on 8400 samples
Epoch 1/10
33600/33600 [==============================] - 3s - loss: 0.4579 - acc: 0.8793 - val_loss: 0.2171 - val_acc: 0.9355
Epoch 2/10
33600/33600 [==============================] - 2s - loss: 0.1783 - acc: 0.9499 - val_loss: 0.1426 - val_acc: 0.9605
Epoch 3/10
33600/33600 [==============================] - 2s - loss: 0.1161 - acc: 0.9678 - val_loss: 0.1187 - val_acc: 0.9639
Epoch 4/10
33600/33600 [==============================] - 2s - loss: 0.0896 - acc: 0.9748 - val_loss: 0.0899 - val_acc: 0.9729
Epoch 5/10
33600/33600 [==============================] - 2s - loss: 0.0743 - acc: 0.9795 - val_loss: 0.0820 - val_acc: 0.9738
Epoch 6/10
33600/33600 [==============================] - 2s - loss: 0.0648 - acc: 0.9820 - val_loss: 0.0789 - val_acc: 0.9760
Epoch 7/10
33600/33600 [==============================] - 2s - loss: 0.0573 - acc: 0.9844 - val_loss: 0.0689 - val_acc: 0.9785
Epoch 8/10
33600/33600 [==============================] - 2s - loss: 0.0509 - acc: 0.9856 - val_loss: 0.0707 - val_acc: 0.9794
Epoch 9/10
33600/33600 [==============================] - 2s - loss: 0.0461 - acc: 0.9874 - val_loss: 0.0698 - val_acc: 0.9763
Epoch 10/10
33600/33600 [==============================] - 2s - loss: 0.0428 - acc: 0.9879 - val_loss: 0.0673 - val_acc: 0.9786
Out[22]:
<keras.callbacks.History at 0x7f46fe8e03d0>

In [ ]:

Purely Dense network


In [23]:
model = Sequential()
model.add(Dense(4096, activation='relu', input_dim = 784))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax', W_regularizer = 'l2'))
model.compile(optimizer = 'adam',  loss='categorical_crossentropy', metrics = ['accuracy'])

In [26]:
model.fit(X[:split_ind], y_train_ohe, batch_size = 32, nb_epoch = 20,
         validation_data = (X[split_ind:], y_val_ohe))


Train on 33600 samples, validate on 8400 samples
Epoch 1/20
33600/33600 [==============================] - 24s - loss: 0.3659 - acc: 0.9057 - val_loss: 0.1639 - val_acc: 0.9508
Epoch 2/20
33600/33600 [==============================] - 25s - loss: 0.2177 - acc: 0.9462 - val_loss: 0.1592 - val_acc: 0.9558
Epoch 3/20
33600/33600 [==============================] - 25s - loss: 0.1918 - acc: 0.9558 - val_loss: 0.1101 - val_acc: 0.9674
Epoch 4/20
33600/33600 [==============================] - 24s - loss: 0.1666 - acc: 0.9632 - val_loss: 0.0976 - val_acc: 0.9720
Epoch 5/20
33600/33600 [==============================] - 24s - loss: 0.1526 - acc: 0.9671 - val_loss: 0.1104 - val_acc: 0.9702
Epoch 6/20
33600/33600 [==============================] - 24s - loss: 0.1433 - acc: 0.9703 - val_loss: 0.1055 - val_acc: 0.9745
Epoch 7/20
33600/33600 [==============================] - 24s - loss: 0.1438 - acc: 0.9706 - val_loss: 0.1132 - val_acc: 0.9714
Epoch 8/20
33600/33600 [==============================] - 24s - loss: 0.1312 - acc: 0.9726 - val_loss: 0.1182 - val_acc: 0.9718
Epoch 9/20
33600/33600 [==============================] - 24s - loss: 0.1278 - acc: 0.9752 - val_loss: 0.0959 - val_acc: 0.9782
Epoch 10/20
33600/33600 [==============================] - 24s - loss: 0.1206 - acc: 0.9772 - val_loss: 0.1075 - val_acc: 0.9746
Epoch 11/20
33600/33600 [==============================] - 24s - loss: 0.1173 - acc: 0.9774 - val_loss: 0.1136 - val_acc: 0.9727
Epoch 12/20
 7104/33600 [=====>........................] - ETA: 18s - loss: 0.0938 - acc: 0.9832
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-26-ce3248828ba3> in <module>()
      1 model.fit(X[:split_ind], y_train_ohe, batch_size = 32, nb_epoch = 20,
----> 2          validation_data = (X[split_ind:], y_val_ohe))

/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/models.pyc in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, **kwargs)
    625                               shuffle=shuffle,
    626                               class_weight=class_weight,
--> 627                               sample_weight=sample_weight)
    628 
    629     def evaluate(self, x, y, batch_size=32, verbose=1,

/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight)
   1122                               verbose=verbose, callbacks=callbacks,
   1123                               val_f=val_f, val_ins=val_ins, shuffle=shuffle,
-> 1124                               callback_metrics=callback_metrics)
   1125 
   1126     def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):

/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, nb_epoch, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics)
    840                 batch_logs['size'] = len(batch_ids)
    841                 callbacks.on_batch_begin(batch_index, batch_logs)
--> 842                 outs = f(ins_batch)
    843                 if type(outs) != list:
    844                     outs = [outs]

/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/backend/theano_backend.pyc in __call__(self, inputs)
    790     def __call__(self, inputs):
    791         assert type(inputs) in {list, tuple}
--> 792         return self.function(*inputs)
    793 
    794 

/home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

KeyboardInterrupt: 

RF


In [28]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf = RandomForestClassifier(n_estimators = 100)
rf.fit(X[:split_ind, :], y_train_ohe)
preds = rf.predict(X[split_ind:])
accuracy_score(y_val_ohe, preds)


Out[28]:
0.88833333333333331

Deeper Conv Network


In [31]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode = 'same', input_shape = (1, 28, 28), activation = 'relu'))
model.add(Convolution2D(64, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(Convolution2D(128, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides = None))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
model.compile(optimizer = 'adam',  loss='categorical_crossentropy', metrics = ['accuracy'])

model.fit(X_train, y_train_ohe, batch_size = 32, nb_epoch = 10, validation_data = (X_val, y_val_ohe))


Train on 33600 samples, validate on 8400 samples
Epoch 1/10
33600/33600 [==============================] - 26s - loss: 0.1631 - acc: 0.9490 - val_loss: 0.0629 - val_acc: 0.9808
Epoch 2/10
33600/33600 [==============================] - 26s - loss: 0.0615 - acc: 0.9812 - val_loss: 0.0659 - val_acc: 0.9800
Epoch 3/10
33600/33600 [==============================] - 26s - loss: 0.0464 - acc: 0.9853 - val_loss: 0.0474 - val_acc: 0.9861
Epoch 4/10
33600/33600 [==============================] - 26s - loss: 0.0370 - acc: 0.9883 - val_loss: 0.0513 - val_acc: 0.9849
Epoch 5/10
33600/33600 [==============================] - 26s - loss: 0.0322 - acc: 0.9891 - val_loss: 0.0561 - val_acc: 0.9846
Epoch 6/10
33600/33600 [==============================] - 26s - loss: 0.0252 - acc: 0.9918 - val_loss: 0.0505 - val_acc: 0.9864
Epoch 7/10
33600/33600 [==============================] - 26s - loss: 0.0228 - acc: 0.9926 - val_loss: 0.0501 - val_acc: 0.9870
Epoch 8/10
33600/33600 [==============================] - 26s - loss: 0.0205 - acc: 0.9935 - val_loss: 0.0443 - val_acc: 0.9886
Epoch 9/10
33600/33600 [==============================] - 26s - loss: 0.0162 - acc: 0.9942 - val_loss: 0.0533 - val_acc: 0.9877
Epoch 10/10
33600/33600 [==============================] - 26s - loss: 0.0156 - acc: 0.9949 - val_loss: 0.0481 - val_acc: 0.9886
Out[31]:
<keras.callbacks.History at 0x7f46f4d69110>

Deeper Conv Network with 2 Dense layers


In [32]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode = 'same', input_shape = (1, 28, 28), activation = 'relu'))
model.add(Convolution2D(64, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(Convolution2D(128, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides = None))
model.add(Dropout(0.5))
model.add(Flatten())

model.add(Dense(512, activation='relu', input_dim = 784))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax', W_regularizer = 'l2'))

model.compile(optimizer = 'adam',  loss='categorical_crossentropy', metrics = ['accuracy'])

model.fit(X_train, y_train_ohe, batch_size = 32, nb_epoch = 10, validation_data = (X_val, y_val_ohe))


Train on 33600 samples, validate on 8400 samples
Epoch 1/10
33600/33600 [==============================] - 38s - loss: 0.3165 - acc: 0.9300 - val_loss: 0.0568 - val_acc: 0.9825
Epoch 2/10
33600/33600 [==============================] - 38s - loss: 0.1190 - acc: 0.9739 - val_loss: 0.0499 - val_acc: 0.9864
Epoch 3/10
33600/33600 [==============================] - 38s - loss: 0.0869 - acc: 0.9809 - val_loss: 0.0447 - val_acc: 0.9869
Epoch 4/10
33600/33600 [==============================] - 38s - loss: 0.0737 - acc: 0.9835 - val_loss: 0.0438 - val_acc: 0.9886
Epoch 5/10
33600/33600 [==============================] - 37s - loss: 0.0701 - acc: 0.9836 - val_loss: 0.0430 - val_acc: 0.9886
Epoch 6/10
33600/33600 [==============================] - 37s - loss: 0.0593 - acc: 0.9874 - val_loss: 0.0436 - val_acc: 0.9892
Epoch 7/10
33600/33600 [==============================] - 37s - loss: 0.0558 - acc: 0.9883 - val_loss: 0.0402 - val_acc: 0.9908
Epoch 8/10
33600/33600 [==============================] - 38s - loss: 0.0468 - acc: 0.9907 - val_loss: 0.0400 - val_acc: 0.9915
Epoch 9/10
33600/33600 [==============================] - 37s - loss: 0.0483 - acc: 0.9899 - val_loss: 0.0489 - val_acc: 0.9888
Epoch 10/10
33600/33600 [==============================] - 37s - loss: 0.0428 - acc: 0.9921 - val_loss: 0.0529 - val_acc: 0.9883
Out[32]:
<keras.callbacks.History at 0x7f46f0eae510>

In [33]:
1+2


Out[33]:
3

In [37]:
#test = pd.read_csv(DATA_DIR + 'test.csv')
X_test = test.values / 255.0
X_test_reshape = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

In [38]:
p = model.predict(X_test_reshape)
#model.save_weights(DATA_DIR + 'mnist_conv.h5')

In [56]:
f = lambda x: np.argmax(x)

df = pd.DataFrame({'ImageId':test.index.values + 1, 'Label': map(f, p)})

In [57]:
df.to_csv(DATA_DIR + 'mnist_conv.csv', index = False)

In [ ]: