Classify handwritten digits with Keras (MXNet backend)

Download the MNIST dataset from Internet
Preprocessing the dataset
Softmax Regression
A small Convolutional Neural Network



In [ ]:

    
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
import pandas as pd
import sklearn
import os
import requests
from tqdm._tqdm_notebook import tqdm_notebook
import tarfile

1. Download the MNIST dataset from Internet

I've made the dataset into a zipped tar file. You'll have to download it now.



In [ ]:

    
def download_file(url,file):
    # Streaming, so we can iterate over the response.
    r = requests.get(url, stream=True)

    # Total size in bytes.
    total_size = int(r.headers.get('content-length', 0)); 
    block_size = 1024
    wrote = 0 
    with open(file, 'wb') as f:
        for data in tqdm_notebook(r.iter_content(block_size), total=np.ceil(total_size//block_size) , unit='KB', unit_scale=True):
            wrote = wrote  + len(data)
            f.write(data)
    if total_size != 0 and wrote != total_size:
        print("ERROR, something went wrong") 

url = "https://github.com/chi-hung/PythonTutorial/raw/master/datasets/mnist.tar.gz"
file = "mnist.tar.gz"
print('Retrieving the MNIST dataset...')
download_file(url,file)
print('Extracting the MNIST dataset...')
tar = tarfile.open(file)
tar.extractall()
tar.close()
print('Completed fetching the MNIST dataset.')



In [ ]:

10 folders of images will be extracted from the downloaded tar file.

2. Preprocessing the dataset



In [ ]:

    
def filePathsGen(rootPath):
    paths=[]
    dirs=[]
    for dirPath,dirNames,fileNames in os.walk(rootPath):
        for fileName in fileNames:
            fullPath=os.path.join(dirPath,fileName)
            paths.append((int(dirPath[len(rootPath) ]),fullPath))
        dirs.append(dirNames)
    return dirs,paths



In [ ]:

    
dirs,paths=filePathsGen('mnist/') # load the image paths
dfPath=pd.DataFrame(paths,columns=['class','path']) # save image paths as a Pandas DataFrame
dfPath.head(5) # see the first 5 paths of the DataFrame

How many digit classes & how many figures belong to each of the classes?



In [ ]:

    
dfCountPerClass=dfPath.groupby('class').count()
dfCountPerClass.rename(columns={'path':'amount of figures'},inplace=True)
dfCountPerClass.plot(kind='bar',rot=0)

Split the image paths into train($70\%$), val($15\%$), test($15\%$)



In [ ]:

    
train=dfPath.sample(frac=0.7) # sample 70% data to be the train dataset
test=dfPath.drop(train.index) # the rest 30% are now the test dataset

# take 50% of the test dataset as the validation dataset
val=test.sample(frac=1/2)
test=test.drop(val.index)

# let's check the length of the train, val and test dataset.
print('number of all figures = {:10}.'.format(len(dfPath)))
print('number of train figures= {:9}.'.format(len(train)))
print('number of val figures= {:10}.'.format(len(val)))
print('number of test figures= {:9}.'.format(len(test)))

# let's take a look: plotting 3 figures from the train dataset
for j in range(3):
    img=plt.imread(train['path'].iloc[j])
    plt.imshow(img,cmap="gray")
    plt.axis("off")
    plt.show()

Load images into RAM



In [ ]:

    
def dataLoad(dfPath):
    paths=dfPath['path'].values
    x=np.zeros((len(paths),28,28),dtype=np.float32 )

    for j in range(len(paths)):
        x[j,:,:]=plt.imread(paths[j])/255

    y=dfPath['class'].values
    return x,y



In [ ]:

    
train_x,train_y=dataLoad(train)
val_x,val_y=dataLoad(val)
test_x,test_y=dataLoad(test)

Remark: loading all images to RAM might take a while.



In [ ]:

    
print("tensor shapes:\n")
print('train:',train_x.shape,train_y.shape)
print('val  :',val_x.shape,val_y.shape)
print('test :',test_x.shape,test_y.shape)

3. Softmax Regression



In [ ]:

    
from keras.models import Sequential
from keras.layers import Dense,Flatten
from keras.optimizers import SGD

Onehot-encoding the labels:



In [ ]:

    
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
train_y_onehot = np.float32( enc.fit_transform(train_y.reshape(-1,1)) \
                   .toarray() )

val_y_onehot = np.float32( enc.fit_transform(val_y.reshape(-1,1)) \
                 .toarray() )

test_y_onehot = np.float32( enc.fit_transform(test_y.reshape(-1,1)) \
                  .toarray() )

Construct the model:



In [ ]:

    
model = Sequential()
model.add(Flatten(input_shape=(28,28)))
model.add(Dense(10, activation='softmax') )

sgd=SGD(lr=0.2, momentum=0.0, decay=0.0)
model.compile(optimizer='sgd',
      loss='categorical_crossentropy',
      metrics=['accuracy'])

More details about the constructed model:



In [ ]:

    
model.summary()

Train the model:



In [ ]:

    
hist=model.fit(train_x, train_y_onehot,
               epochs=20, batch_size=128,
               validation_data=(val_x,val_y_onehot))

See how the accuracy climbs during training:



In [ ]:

    
plt.plot(hist.history['acc'],ms=5,marker='o',label='accuracy')
plt.plot(hist.history['val_acc'],ms=5,marker='o',label='val accuracy')
plt.legend()
plt.show()

Now, you'll probably want to evaluate or save the trained model.



In [ ]:

    
# calculate loss & accuracy (evaluated on the test dataset)
score = model.evaluate(test_x, test_y_onehot, batch_size=128)
print("LOSS (evaluated on the test dataset)=     {}".format(score[0]))
print("ACCURACY (evaluated on the test dataset)= {}".format(score[1]))

Save model architecture & weights:



In [ ]:

    
import json
with open('first_try.json', 'w') as jsOut:
    json.dump(model.to_json(), jsOut)

model.save_weights('first_try.h5')

Load the saved model architecture & weights:



In [ ]:

    
from keras.models import model_from_json



In [ ]:

    
with open('first_try.json', 'r') as jsIn:
    model_architecture=json.load(jsIn)
    
model_new=model_from_json(model_architecture)
model_new.load_weights('first_try.h5')



In [ ]:

    
model_new.summary()

Output the classification report (see if the trained model works well on the test data):



In [ ]:

    
pred_y=model.predict(test_x).argmax(axis=1)
from sklearn.metrics import classification_report
print( classification_report(test_y,pred_y) )

4. A small Convolutional Neural Network

Reshape the tensors (this step is necessary, because the CNN model wants the input tensor to be 4D):



In [ ]:

    
train_x = np.expand_dims(train_x,axis=1)
val_x = np.expand_dims(val_x,axis=1)
test_x = np.expand_dims(test_x,axis=1)

Create the model:



In [ ]:

    
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten,Conv2D, MaxPooling2D
from keras.layers import Activation
from keras.optimizers import SGD

in_shape=(1,28,28)
# ========== BEGIN TO CREATE THE MODEL ==========
model = Sequential()
# feature extraction (2 conv layers)
model.add(Conv2D(32, (3,3),
                 activation='relu',
                 input_shape=in_shape))
model.add(Conv2D(64, (3,3), activation='relu')
         )
model.add(MaxPooling2D(pool_size=(2, 2))
         )
model.add(Dropout(0.5))
model.add(Flatten())
# classification (2 dense layers)
model.add(Dense(128, activation='relu')
         )
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
# ========== COMPLETED THE MODEL CREATION========

# Compile the model before training.
model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=0.01,momentum=0.1),
              metrics=['accuracy'],
              context=['gpu(0)'])

Train the model:



In [ ]:

    
%%time 
hist=model.fit(train_x, train_y_onehot, 
               epochs=20,
               batch_size=32,
               validation_data=(val_x,val_y_onehot),
              )

See how the accuracy climbs during training:



In [ ]:

    
plt.plot(hist.history['acc'],ms=5,marker='o',label='accuracy')
plt.plot(hist.history['val_acc'],ms=5,marker='o',label='val accuracy')
plt.legend()
plt.show()

Output the classification report (see if the trained model works well on the test data):



In [ ]:

    
pred_y=model.predict(test_x).argmax(axis=1)
from sklearn.metrics import classification_report
print( classification_report(test_y,pred_y) )



In [ ]: