In [1]:
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import gluon
In [2]:
# Setting random seed
mx.random.seed(1)
In [3]:
# Set context
data_ctx = mx.cpu()
model_ctx = mx.cpu()
In [4]:
# Transforming the data from range 0-255 to 0-1
def transform(data, label):
return data.astype(np.float32) / 255, label.astype(np.float32)
In [5]:
mnist_train = gluon.data.vision.MNIST(train=True,
transform=transform)
mnist_test = gluon.data.vision.MNIST(train=False,
transform=transform)
In [6]:
image, label = mnist_train[0]
In [7]:
image.shape
Out[7]:
In [8]:
label
Out[8]:
In [9]:
im = mx.nd.tile(image, (1,1,3))
print(im.shape)
In [10]:
import matplotlib.pyplot as plt
plt.imshow(im.asnumpy())
plt.show()
In [11]:
num_inputs = 784
num_outputs = 10
num_examples = 60000
In [12]:
batch_size = 64
train_data = gluon.data.DataLoader(dataset=mnist_train,
batch_size=batch_size,
shuffle=True)
test_data = gluon.data.DataLoader(dataset=mnist_test,
batch_size=batch_size,
shuffle=False)
In [13]:
W = mx.nd.random_normal(shape=(num_inputs, num_outputs),
ctx=model_ctx)
b = mx.nd.random_normal(shape=num_outputs,
ctx=model_ctx)
params = [W, b]
In [14]:
for param in params:
param.attach_grad()
In [15]:
def softmax(y_linear):
exp = mx.nd.exp(y_linear - mx.nd.max(y_linear, axis=1).reshape((-1, 1)))
norms = mx.nd.sum(exp,
axis=1).reshape((-1, 1))
return exp / norms
In [16]:
sample_y_linear = mx.nd.random_normal(shape=(1, 3))
sample_yhat = softmax(sample_y_linear)
print(sample_y_linear)
print(sample_yhat)
In [17]:
print(mx.nd.sum(sample_yhat, axis=1))
In [18]:
def net(X):
y_linear = mx.nd.dot(X, W) + b
yhat = softmax(y_linear)
return yhat
In [19]:
def cross_entropy(yhat, y):
return - mx.nd.sum(y * mx.nd.log(yhat + 1e-6))
In [20]:
def SGD(params, lr):
for param in params:
param[:] = param - lr * param.grad
In [21]:
def evaluate_accuracy(data_iterator, net):
# Numerator stores number of correct prediction
numerator = 0.
# Denominator stores number of all samples
denominator = 0.
for i, (data, label) in enumerate(data_iterator):
data = data.as_in_context(model_ctx).reshape((-1, 784))
label = label.as_in_context(model_ctx)
label_one_hot = mx.nd.one_hot(label, 10)
output = net(data)
predictions = mx.nd.argmax(output,
axis=1)
numerator += mx.nd.sum(predictions == label)
denominator += data.shape[0]
return (numerator / denominator).asscalar()
In [22]:
evaluate_accuracy(test_data, net)
Out[22]:
In [23]:
epochs = 5
learning_rate = .005
for e in range(epochs + 1):
cumulative_loss = 0
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(model_ctx).reshape((-1,784))
label = label.as_in_context(model_ctx)
label_one_hot = mx.nd.one_hot(label, 10)
with mx.autograd.record():
output = net(data)
loss = cross_entropy(output, label_one_hot)
loss.backward()
SGD(params, learning_rate)
cumulative_loss += mx.nd.sum(loss).asscalar()
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, cumulative_loss / num_examples, train_accuracy, test_accuracy))
In [24]:
# Define the function to do prediction
def model_predict(net,data):
output = net(data)
return mx.nd.argmax(output, axis=1)
In [25]:
# let's sample 10 random data points from the test set
sample_data = gluon.data.DataLoader(mnist_test, 10, shuffle=True)
In [26]:
for i, (data, label) in enumerate(sample_data):
data = data.as_in_context(model_ctx)
print(data.shape)
im = mx.nd.transpose(data,(1,0,2,3))
im = mx.nd.reshape(im,(28,10*28,1))
imtiles = mx.nd.tile(im, (1,1,3))
plt.imshow(imtiles.asnumpy())
plt.show()
pred=model_predict(net,data.reshape((-1,784)))
print('model predictions are:', pred)
break