Deep convolutional neural networks


In [1]:
import mxnet as mx
import numpy as np
from mxnet import gluon
from tqdm import tqdm_notebook as tqdm
mx.random.seed(1)

Context


In [2]:
ctx = mx.cpu()

Load CIFAR


In [3]:
batch_size = 64

In [4]:
def transformer(data, label):
    data = mx.image.imresize(data, 224, 224)
    data = mx.nd.transpose(data, (2,0,1))
    data = data.astype(np.float32)
    return data, label

In [5]:
train_data = gluon.data.DataLoader(dataset=gluon.data.vision.CIFAR10('./data', train=True, transform=transformer),
                                   batch_size=batch_size,
                                   shuffle=True,
                                   last_batch='discard')

test_data = gluon.data.DataLoader(dataset=gluon.data.vision.CIFAR10('./data', train=False, transform=transformer),
                                  batch_size=batch_size,
                                  shuffle=False,
                                  last_batch='discard')

In [6]:
for d, l in train_data:
    break

In [7]:
print(d.shape, l.shape)


(64, 3, 224, 224) (64,)

In [8]:
d.dtype


Out[8]:
numpy.float32

AlexNet


In [9]:
alex_net = gluon.nn.Sequential()
with alex_net.name_scope():
    #  First convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=96,
                                 kernel_size=11,
                                 strides=(4,4),
                                 activation='relu'))
    alex_net.add(gluon.nn.MaxPool2D(pool_size=3,
                                    strides=2))
    #  Second convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=192,
                                 kernel_size=5,
                                 activation='relu'))
    alex_net.add(gluon.nn.MaxPool2D(pool_size=3,
                                    strides=(2,2)))
    # Third convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=384,
                                 kernel_size=3,
                                 activation='relu'))
    # Fourth convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=384,
                                 kernel_size=3,
                                 activation='relu'))
    # Fifth convolutional layer
    alex_net.add(gluon.nn.Conv2D(channels=256,
                                 kernel_size=3,
                                 activation='relu'))
    alex_net.add(gluon.nn.MaxPool2D(pool_size=3,
                                    strides=2))
    # Flatten and apply fullly connected layers
    alex_net.add(gluon.nn.Flatten())
    alex_net.add(gluon.nn.Dense(units=4096,
                                activation="relu"))
    alex_net.add(gluon.nn.Dense(units=4096,
                                activation="relu"))
    alex_net.add(gluon.nn.Dense(units=10))

Initialize parameters


In [10]:
alex_net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

Optimizer


In [11]:
trainer = gluon.Trainer(params=alex_net.collect_params(),
                        optimizer='sgd',
                        optimizer_params={'learning_rate': .001})

Softmax cross-entropy loss


In [12]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

Evaluation


In [13]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for d, l in data_iterator:
        data = d.as_in_context(ctx)
        label = l.as_in_context(ctx)
        output = net(data)
        predictions = mx.nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

Training


In [14]:
epochs = 1
smoothing_constant = .01

for e in range(epochs):
    for i, (d, l) in tqdm(enumerate(train_data)):
        data = d.as_in_context(ctx)
        label = l.as_in_context(ctx)
        with mx.autograd.record():
            output = alex_net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = mx.nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, alex_net)
    train_accuracy = evaluate_accuracy(train_data, alex_net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))


Epoch 0. Loss: 1.8251213145083127, Train_acc 0.29121318822023046, Test_acc 0.29146634615384615