Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements; and to You under the Apache License, Version 2.0.
Download the dataset to your workspace (i.e. the notebook folder).
In [1]:
from __future__ import division
from builtins import zip
from builtins import str
from builtins import range
from past.utils import old_div
from future import standard_library
from __future__ import print_function
from tqdm import tnrange, tqdm_notebook
standard_library.install_aliases()
import pickle, gzip
# Load the dataset
f = gzip.open('mnist.pkl.gz', 'rb')
train_set, valid_set, _ = pickle.load(f, encoding='latin1')
f.close()
In [2]:
print(train_set[0].shape, train_set[1].shape)
print(valid_set[0].shape, valid_set[1].shape)
In [3]:
import numpy as np
train_x = np.reshape(train_set[0], (50000, 1, 28, 28)).astype(np.float32, copy=False)
train_y = np.array(train_set[1]).astype(np.int32, copy=False)
valid_x = np.reshape(valid_set[0], (10000, 1, 28, 28))
In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(train_x[0][0])
Out[4]:
In [5]:
from singa import net as ffnet
from singa.layer import Conv2D, MaxPooling2D, Dropout, Activation, Flatten, Dense
from singa import optimizer, loss, metric
from singa import layer
layer.engine = 'singacpp'
net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
net.add(Conv2D('conv1', 32, 3, 2, input_sample_shape=(1,28,28)))
net.add(Activation('relu1'))
net.add(Conv2D('conv2', 32, 3, 2))
net.add(Activation('relu2'))
net.add(MaxPooling2D('pool', 3, 2))
net.add(Flatten('flat'))
net.add(Dense('dense', 10))
Out[5]:
In [6]:
for pname, pval in zip(net.param_names(), net.param_values()):
if len(pval.shape) > 1:
pval.gaussian(0, 0.1)
else:
pval.set_value(0)
print(pname, pval.shape, pval.l1())
In [7]:
from singa import tensor
#from singa.proto import core_pb2
from singa import device
from singa import utils
cpu = device.get_default_device()
opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
batch_size = 32
num_train_batch = old_div(train_x.shape[0], batch_size)
tx = tensor.Tensor((batch_size, 1, 28, 28))
ty = tensor.Tensor((batch_size,), cpu , tensor.int32)
# for progress bar
from tqdm import tnrange
idx = np.arange(train_x.shape[0], dtype=np.int32)
In [8]:
for epoch in range(2):
np.random.shuffle(idx)
loss, acc = 0.0, 0.0
bar = tnrange(num_train_batch, desc='Epoch %d' % epoch)
for b in bar:
x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
tx.copy_from_numpy(x)
ty.copy_from_numpy(y)
grads, (l, a) = net.train(tx, ty)
loss += l
acc += a
for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
opt.apply_with_lr(epoch, 0.01, g, p, str(s), b)
# update progress bar
bar.set_postfix(train_loss=l, train_accuracy=a)
print('Epoch = %d, training loss = %f, training accuracy = %f' % (epoch, old_div(loss, num_train_batch), old_div(acc, num_train_batch)))
In [9]:
net.save('checkpoint')
In [10]:
for pval in net.param_values():
pval.set_value(0)
net.load('checkpoint')
In [11]:
from PIL import Image
img = Image.open('static/digit.jpg').convert('L')
img = img.resize((28,28))
img = old_div(np.array(img, dtype=np.float32),255)
img = tensor.from_numpy(img)
img.reshape((1,1,28,28))
y=net.predict(img)
In [12]:
prob=tensor.to_numpy(y)[0]
plt.plot(list(range(10)), prob)
Out[12]:
In [13]:
np.random.shuffle(idx)
ffnet.verbose=True
for pname, pval in zip(net.param_names(), net.param_values()):
if len(pval.shape) > 1:
pval.gaussian(0, 10)
else:
pval.set_value(0)
print(pname, pval.shape, pval.l1())
for b in range(10):
print("\n\nEpoch %d" % b)
x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
tx.copy_from_numpy(x)
ty.copy_from_numpy(y)
grads, (l, a) = net.train(tx, ty)
print('\n loss = %f, params' % l)
for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
opt.apply_with_lr(epoch, 0.01, g, p, str(s), b)
print(s, p.l1())
In [14]:
def vis_square(data):
"""Take an array of shape (n, height, width) or (n, height, width, 3)
and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)"""
# normalize data for display
data = old_div((data - data.min()), (data.max() - data.min()))
# force the number of filters to be square
n = int(np.ceil(np.sqrt(data.shape[0])))
padding = (((0, n ** 2 - data.shape[0]),
(0, 1), (0, 1)) # add some space between filters
+ ((0, 0),) * (data.ndim - 3)) # don't pad the last dimension (if there is one)
data = np.pad(data, padding, mode='constant', constant_values=1) # pad with ones (white)
# tile the filters into an image
data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
plt.imshow(data); plt.axis('off')
In [15]:
np.random.shuffle(idx)
ffnet.verbose=False
net.load('checkpoint')
b=1
x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
tx.copy_from_numpy(x)
r = net.forward(False, tx, ['relu1', 'relu2'])
In [16]:
r1 = tensor.to_numpy(r['relu1'])[0]
vis_square(r1)
In [17]:
r2 = tensor.to_numpy(r['relu2'])[0]
vis_square(r2)
In [18]:
p=net.param_values()[2]
print(p.shape)
In [19]:
vis_square(tensor.to_numpy(p)[0].reshape(32, 3,3))
In [ ]: