Multiclass logistic regression with gluon


In [1]:
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import gluon
from tqdm import tqdm

Context


In [2]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()

MNIST Dataset


In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
num_examples = 60000

In [4]:
def transform(data, label):
    return data.astype(np.float32) / 255, label.astype(np.float32)

In [5]:
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, 
                                                           transform=transform),
                                   batch_size, shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, 
                                                          transform=transform),
                                  batch_size, shuffle=False)

Mutliclass Logistic Regression


In [6]:
net = gluon.nn.Dense(num_outputs)

Parameter Initialization


In [7]:
net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=model_ctx)

Softmax Cross-Entropy Loss


In [8]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

Optimizer


In [9]:
trainer = gluon.Trainer(params=net.collect_params(), 
                        optimizer='sgd', 
                        optimizer_params={'learning_rate': 0.1})

Evaluation Function


In [10]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1,784))
        label = label.as_in_context(model_ctx)
        output = net(data)
        predictions = mx.nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

Accuracy - Randomly initialized network


In [11]:
evaluate_accuracy(test_data, net)


Out[11]:
0.0774

Training


In [12]:
# Hyperparameters
epochs = 10
moving_loss = 0.

In [13]:
for e in tqdm(range(epochs)):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1,784))
        label = label.as_in_context(model_ctx)
        with mx.autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)
        cumulative_loss += mx.nd.sum(loss).asscalar()

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc: %s, Test_acc: %s" % (e, 
                                                               cumulative_loss / num_examples, 
                                                               train_accuracy, 
                                                               test_accuracy))


  0%|                                                                                           | 0/10 [00:00<?, ?it/s]
Epoch 0. Loss: 2.141845157019297, Train_acc: 0.7916833333333333, Test_acc: 0.8025
 10%|████████▎                                                                          | 1/10 [00:35<05:18, 35.33s/it]
Epoch 1. Loss: 0.9146137731234233, Train_acc: 0.8338166666666667, Test_acc: 0.843
 20%|████████████████▌                                                                  | 2/10 [01:10<04:41, 35.21s/it]
Epoch 2. Loss: 0.7461259582916896, Train_acc: 0.85135, Test_acc: 0.8576
 30%|████████████████████████▉                                                          | 3/10 [01:45<04:06, 35.23s/it]
Epoch 3. Loss: 0.6599271810730298, Train_acc: 0.86375, Test_acc: 0.8691
 40%|█████████████████████████████████▏                                                 | 4/10 [02:21<03:32, 35.34s/it]
Epoch 4. Loss: 0.6039995469371477, Train_acc: 0.8703666666666666, Test_acc: 0.876
 50%|█████████████████████████████████████████▌                                         | 5/10 [02:58<02:58, 35.60s/it]
Epoch 5. Loss: 0.5632936976909637, Train_acc: 0.8758, Test_acc: 0.8809
 60%|█████████████████████████████████████████████████▊                                 | 6/10 [03:33<02:22, 35.58s/it]
Epoch 6. Loss: 0.5326414702574412, Train_acc: 0.88, Test_acc: 0.8851
 70%|██████████████████████████████████████████████████████████                         | 7/10 [04:08<01:46, 35.51s/it]
Epoch 7. Loss: 0.5081972484668096, Train_acc: 0.8840166666666667, Test_acc: 0.8865
 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [04:44<01:11, 35.57s/it]
Epoch 8. Loss: 0.48769780044555666, Train_acc: 0.8867833333333334, Test_acc: 0.8878
 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [05:19<00:35, 35.47s/it]
Epoch 9. Loss: 0.4706066422462463, Train_acc: 0.8899166666666667, Test_acc: 0.8923
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [05:54<00:00, 35.44s/it]

Visualize predictions


In [14]:
import matplotlib.pyplot as plt

In [15]:
# Prediction function
def model_predict(net,data):
    output = net(data.as_in_context(model_ctx))
    return mx.nd.argmax(output, axis=1)

In [16]:
# let's sample 10 random data points from the test set
sample_data = mx.gluon.data.DataLoader(dataset=mx.gluon.data.vision.MNIST(train=False, 
                                                                          transform=transform),
                                       batch_size=10, shuffle=True)

In [17]:
for i, (data, label) in enumerate(sample_data):
    data = data.as_in_context(model_ctx)
    print(data.shape)
    im = mx.nd.transpose(data,(1, 0, 2, 3))
    im = mx.nd.reshape(im,(28, 10*28,1))
    imtiles = mx.nd.tile(im, (1,1,3))

    plt.imshow(imtiles.asnumpy())
    plt.show()
    pred=model_predict(net,data.reshape((-1,784)))
    print('model predictions are:', pred)
    break


(10, 28, 28, 1)
model predictions are: 
[9. 5. 1. 7. 8. 8. 8. 1. 2. 1.]
<NDArray 10 @cpu(0)>