Dropout regularization with gluon



In [1]:

    
import mxnet as mx
import numpy as np
from mxnet import gluon
from tqdm import tqdm_notebook as tqdm

Context



In [2]:

    
ctx = mx.cpu()

The MNIST Dataset



In [3]:

    
batch_size = 64
num_inputs = 784
num_outputs = 10



In [4]:

    
def transform(data, label):
    return data.astype(np.float32) / 255, label.astype(np.float32)



In [5]:

    
train_data = gluon.data.DataLoader(dataset=gluon.data.vision.MNIST(train=True, transform=transform),
                                   batch_size=batch_size,
                                   shuffle=True)
test_data = gluon.data.DataLoader(dataset=gluon.data.vision.MNIST(train=False, transform=transform),
                                  batch_size=batch_size,
                                  shuffle=False)

Define the model



In [6]:

    
num_hidden = 256



In [7]:

    
net = gluon.nn.Sequential()
with net.name_scope():
    ###########################
    # Adding first hidden layer
    ###########################
    net.add(gluon.nn.Dense(units=num_hidden,
                           activation="relu"))
    ###########################
    # Adding dropout with rate .5 to the first hidden layer
    ###########################
    net.add(gluon.nn.Dropout(rate=0.5))

    ###########################
    # Adding first hidden layer
    ###########################
    net.add(gluon.nn.Dense(units=num_hidden,
                           activation="relu"))
    ###########################
    # Adding dropout with rate .5 to the second hidden layer
    ###########################
    net.add(gluon.nn.Dropout(rate=0.5))
    ###########################
    # Adding the output layer
    ###########################
    net.add(gluon.nn.Dense(units=num_outputs))

Parameter initialization



In [8]:

    
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)



In [9]:

    
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()



In [10]:

    
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})

Evaluation



In [11]:

    
def evaluate_accuracy(data_iterator, net, mode='train'):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx).reshape([-1, 784])
        label = label.as_in_context(ctx)
        if mode == 'train':
            with mx.autograd.train_mode():
                output = net(data)
        else: 
            with mx.autograd.predict_mode():
                output = net(data)
        predictions = mx.nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

Training



In [12]:

    
epochs = 10
smoothing_constant = .01



In [13]:

    
for e in tqdm(range(epochs)):
    for i, (data, label) in tqdm(enumerate(train_data)):
        data = data.as_in_context(ctx).reshape([-1, 784])
        label = label.as_in_context(ctx)
        with mx.autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
            loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = mx.nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net, mode='test')
    train_accuracy = evaluate_accuracy(train_data, net, mode='train')
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))









    





 
 










    





 
 










    



Epoch 0. Loss: 0.32520477357779043, Train_acc 0.9060333333333334, Test_acc 0.9397






    





 
 










    



Epoch 1. Loss: 0.23943950985101778, Train_acc 0.9341, Test_acc 0.9558






    





 
 










    



Epoch 2. Loss: 0.20008245042431802, Train_acc 0.9466666666666667, Test_acc 0.9642






    





 
 










    



Epoch 3. Loss: 0.17831271755162717, Train_acc 0.9491333333333334, Test_acc 0.9691






    





 
 










    



Epoch 4. Loss: 0.15330176094461315, Train_acc 0.9566166666666667, Test_acc 0.9714






    





 
 










    



Epoch 5. Loss: 0.141996172699063, Train_acc 0.95895, Test_acc 0.9717






    





 
 










    



Epoch 6. Loss: 0.13022403712175956, Train_acc 0.96155, Test_acc 0.9757






    





 
 










    



Epoch 7. Loss: 0.12963183194639724, Train_acc 0.96315, Test_acc 0.9769






    





 
 










    



Epoch 8. Loss: 0.1198122455772697, Train_acc 0.96525, Test_acc 0.9756






    





 
 










    



Epoch 9. Loss: 0.11669562747581484, Train_acc 0.9665833333333333, Test_acc 0.9756

Predict for the first batch



In [14]:

    
for i, (data, label) in enumerate(test_data):
        data = data[0].as_in_context(ctx).reshape([-1, 784])
        label = label[0].as_in_context(ctx)
        with mx.autograd.record(train_mode=False):
            output = net(data)
            predictions = mx.nd.argmax(output, axis=1)
            print(predictions)
        break









    



[7.]
<NDArray 1 @cpu(0)>

Testing if the accuracy is calculated correctly



In [15]:

    
test_accuracy = evaluate_accuracy(test_data, net, mode='test')



In [16]:

    
test_accuracy









    Out[16]:





0.9756



In [17]:

    
test_accuracy = evaluate_accuracy(test_data, net, mode='test')



In [18]:

    
test_accuracy









    Out[18]:





0.9756