In [10]:
import numpy as np
import pandas as pd
import theano
from keras.models import Sequential,Model
from keras.layers import Dropout ,Activation,AveragePooling2D, normalization,Convolution2D,MaxPooling2D
from keras.layers import Dense, Flatten, Reshape, Input
from keras.layers.merge import add
from keras import optimizers
from keras.utils import np_utils
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
x = pd.read_csv('/home/vasu/all_projects/ML/MNIST/mnist_kaggle/train.csv')

In [3]:
X = np.array(x)
x = X[:,1:]
y = X[:,0]
print x.shape,y.shape


(42000, 784) (42000,)

In [4]:
x = x/255.0

In [5]:
x = x.reshape((X.shape[0], 28, 28,1))
y = np_utils.to_categorical(y)

In [6]:
inp = Input(shape=(28,28,1))
x1 = Convolution2D( 32,(3,3))(inp)

r1_a1 = Activation('relu')(x1)
r1_w1 = Convolution2D(32,( 3,3),padding='same')(r1_a1)
r1_a2 = Activation('relu')(r1_w1)
r1_d1 = Dropout(0.2)(r1_a2)
r1_w2 = Convolution2D(32,( 3,3),padding='same')(r1_d1)
out1 = add([x1, r1_w2])

x2 = Convolution2D(16,( 3,3))(out1)

r2_a1 = Activation('relu')(x2)
r2_w1 = Convolution2D(16,( 3,3),padding='same')(r2_a1)
r2_a2 = Activation('relu')(r2_w1)
r2_d1 = Dropout(0.2)(r2_a2)
r2_w2 = Convolution2D(16,( 3,3),padding='same')(r2_d1)
out2 = add([x2, r2_w2])

x3 = Convolution2D(8,( 3,3))(out2)
acti = Activation('relu')(x3)
av_pool = MaxPooling2D((2,2))((acti))
f1 = Flatten()(av_pool)
fc1 = Dense(32)(f1)
a1 = Activation('relu')(fc1)
fc3 = Dense(10)(a1)
a3 = Activation('softmax')(fc3)

model = Model(outputs=a3, inputs=inp)
model.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
input_1 (InputLayer)             (None, 28, 28, 1)     0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 26, 26, 32)    320         input_1[0][0]                    
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 26, 26, 32)    0           conv2d_1[0][0]                   
____________________________________________________________________________________________________
conv2d_2 (Conv2D)                (None, 26, 26, 32)    9248        activation_1[0][0]               
____________________________________________________________________________________________________
activation_2 (Activation)        (None, 26, 26, 32)    0           conv2d_2[0][0]                   
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 26, 26, 32)    0           activation_2[0][0]               
____________________________________________________________________________________________________
conv2d_3 (Conv2D)                (None, 26, 26, 32)    9248        dropout_1[0][0]                  
____________________________________________________________________________________________________
add_1 (Add)                      (None, 26, 26, 32)    0           conv2d_1[0][0]                   
                                                                   conv2d_3[0][0]                   
____________________________________________________________________________________________________
conv2d_4 (Conv2D)                (None, 24, 24, 16)    4624        add_1[0][0]                      
____________________________________________________________________________________________________
activation_3 (Activation)        (None, 24, 24, 16)    0           conv2d_4[0][0]                   
____________________________________________________________________________________________________
conv2d_5 (Conv2D)                (None, 24, 24, 16)    2320        activation_3[0][0]               
____________________________________________________________________________________________________
activation_4 (Activation)        (None, 24, 24, 16)    0           conv2d_5[0][0]                   
____________________________________________________________________________________________________
dropout_2 (Dropout)              (None, 24, 24, 16)    0           activation_4[0][0]               
____________________________________________________________________________________________________
conv2d_6 (Conv2D)                (None, 24, 24, 16)    2320        dropout_2[0][0]                  
____________________________________________________________________________________________________
add_2 (Add)                      (None, 24, 24, 16)    0           conv2d_4[0][0]                   
                                                                   conv2d_6[0][0]                   
____________________________________________________________________________________________________
conv2d_7 (Conv2D)                (None, 22, 22, 8)     1160        add_2[0][0]                      
____________________________________________________________________________________________________
activation_5 (Activation)        (None, 22, 22, 8)     0           conv2d_7[0][0]                   
____________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)   (None, 11, 11, 8)     0           activation_5[0][0]               
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 968)           0           max_pooling2d_1[0][0]            
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 32)            31008       flatten_1[0][0]                  
____________________________________________________________________________________________________
activation_6 (Activation)        (None, 32)            0           dense_1[0][0]                    
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 10)            330         activation_6[0][0]               
____________________________________________________________________________________________________
activation_7 (Activation)        (None, 10)            0           dense_2[0][0]                    
====================================================================================================
Total params: 60,578
Trainable params: 60,578
Non-trainable params: 0
____________________________________________________________________________________________________

In [7]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
hist = model.fit(x, y,epochs = 20,shuffle=True,batch_size=256,validation_split=0.20)


Train on 33600 samples, validate on 8400 samples
Epoch 1/20
33600/33600 [==============================] - 144s - loss: 0.4299 - acc: 0.8648 - val_loss: 0.1331 - val_acc: 0.9586
Epoch 2/20
33600/33600 [==============================] - 135s - loss: 0.1131 - acc: 0.9668 - val_loss: 0.0805 - val_acc: 0.9749
Epoch 3/20
33600/33600 [==============================] - 135s - loss: 0.0798 - acc: 0.9760 - val_loss: 0.0708 - val_acc: 0.9762
Epoch 4/20
33600/33600 [==============================] - 135s - loss: 0.0620 - acc: 0.9808 - val_loss: 0.0592 - val_acc: 0.9821
Epoch 5/20
33600/33600 [==============================] - 135s - loss: 0.0497 - acc: 0.9843 - val_loss: 0.0586 - val_acc: 0.9813
Epoch 6/20
33600/33600 [==============================] - 135s - loss: 0.0437 - acc: 0.9861 - val_loss: 0.0647 - val_acc: 0.9813
Epoch 7/20
33600/33600 [==============================] - 135s - loss: 0.0377 - acc: 0.9882 - val_loss: 0.0585 - val_acc: 0.9827
Epoch 8/20
33600/33600 [==============================] - 135s - loss: 0.0363 - acc: 0.9884 - val_loss: 0.0495 - val_acc: 0.9846
Epoch 9/20
33600/33600 [==============================] - 136s - loss: 0.0309 - acc: 0.9902 - val_loss: 0.0468 - val_acc: 0.9862
Epoch 10/20
33600/33600 [==============================] - 138s - loss: 0.0235 - acc: 0.9921 - val_loss: 0.0654 - val_acc: 0.9814
Epoch 11/20
33600/33600 [==============================] - 154s - loss: 0.0249 - acc: 0.9915 - val_loss: 0.0482 - val_acc: 0.9867
Epoch 12/20
33600/33600 [==============================] - 154s - loss: 0.0210 - acc: 0.9935 - val_loss: 0.0536 - val_acc: 0.9855
Epoch 13/20
33600/33600 [==============================] - 154s - loss: 0.0178 - acc: 0.9940 - val_loss: 0.0517 - val_acc: 0.9856
Epoch 14/20
33600/33600 [==============================] - 156s - loss: 0.0157 - acc: 0.9952 - val_loss: 0.0517 - val_acc: 0.9870
Epoch 15/20
33600/33600 [==============================] - 154s - loss: 0.0164 - acc: 0.9940 - val_loss: 0.0584 - val_acc: 0.9862
Epoch 16/20
33600/33600 [==============================] - 157s - loss: 0.0154 - acc: 0.9947 - val_loss: 0.0557 - val_acc: 0.9856
Epoch 17/20
33600/33600 [==============================] - 154s - loss: 0.0139 - acc: 0.9951 - val_loss: 0.0559 - val_acc: 0.9868
Epoch 18/20
33600/33600 [==============================] - 157s - loss: 0.0116 - acc: 0.9961 - val_loss: 0.0559 - val_acc: 0.9854
Epoch 19/20
33600/33600 [==============================] - 156s - loss: 0.0112 - acc: 0.9962 - val_loss: 0.0512 - val_acc: 0.9876
Epoch 20/20
33600/33600 [==============================] - 155s - loss: 0.0126 - acc: 0.9956 - val_loss: 0.0600 - val_acc: 0.9854

In [12]:
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])


Out[12]:
[<matplotlib.lines.Line2D at 0x7fde062695d0>]