Importing Necessary Modules

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt #for plotting

from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))

Loading The Dataset

In [2]:
#loading the dataset.......(Train)
train = pd.read_csv("../input/train.csv")

(42000, 785)
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 4 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 785 columns

In [3]:
#loading the dataset.......(Test)
test= pd.read_csv("../input/test.csv")

(28000, 784)
pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 784 columns

In [4]:
x_train = (train.ix[:,1:].values).astype('float32') # all pixel values
y_train = train.ix[:,0].values.astype('int32') # only labels i.e targets digits
x_test = test.values.astype('float32')

Scaling The Data

In [5]:
x_train = x_train/255 + 0.01
x_test = x_test/255 + 0.01

In [6]:

array([1, 0, 1, ..., 7, 6, 9], dtype=int32)

Printing the shape of the Datasets

In [7]:
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (42000, 784)
42000 train samples
28000 test samples

In [8]:
X_train = x_train.reshape(x_train.shape[0], 28, 28,1)
X_test = x_test.reshape(x_test.shape[0], 28, 28,1)

In [9]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
batch_size = 128
num_classes = 10
epochs = 4
input_shape = (28, 28, 1)

In [10]:
# convert class vectors to binary class matrices One Hot Encoding
y_train = keras.utils.to_categorical(y_train, num_classes)

Linear Model

In [11]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))


In [12]:

Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
conv2d_3 (Conv2D)            (None, 10, 10, 64)        36928     
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 64)          0         
batch_normalization_1 (Batch (None, 5, 5, 64)          256       
dropout_1 (Dropout)          (None, 5, 5, 64)          0         
flatten_1 (Flatten)          (None, 1600)              0         
dense_1 (Dense)              (None, 128)               204928    
batch_normalization_2 (Batch (None, 128)               512       
dropout_2 (Dropout)          (None, 128)               0         
dense_2 (Dense)              (None, 10)                1290      
Total params: 262,730
Trainable params: 262,346
Non-trainable params: 384

In [13]:
h =, y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_split=0.2)

Train on 33600 samples, validate on 8400 samples
Epoch 1/4
33600/33600 [==============================] - 223s - loss: 0.2229 - acc: 0.9332 - val_loss: 0.8552 - val_acc: 0.6944
Epoch 2/4
33600/33600 [==============================] - 217s - loss: 0.0734 - acc: 0.9786 - val_loss: 0.0409 - val_acc: 0.9873
Epoch 3/4
33600/33600 [==============================] - 213s - loss: 0.0516 - acc: 0.9841 - val_loss: 0.0615 - val_acc: 0.9813
Epoch 4/4
33600/33600 [==============================] - 217s - loss: 0.0445 - acc: 0.9861 - val_loss: 0.0332 - val_acc: 0.9894

In [14]:
#  "Accuracy"
plt.title('model accuracy')
plt.legend(['train', 'validation'], loc='upper left')
# "Loss"
plt.title('model loss')
plt.legend(['train', 'validation'], loc='upper left')

dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

In [15]:
predictions = model.predict_classes(X_test, verbose=0)
submissions=pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)),
                         "Label": predictions})
submissions.to_csv("asd.csv", index=False, header=True)