In [40]:
# # This is the Tensor Flow version
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
mnist = input_data.read_data_sets('data/MNIST_data', one_hot=False)
X_train, y_train = mnist.train.images, mnist.train.labels
X_val, y_val = mnist.validation.images, mnist.validation.labels
X_test, y_test = mnist.test.images, mnist.test.labels
In [41]:
X_test.shape, X_train.shape, X_val.shape
Out[41]:
In [42]:
M, D, C = X_train.shape[0], X_train.shape[1], y_train.max() + 1
In [43]:
def prepro(X_train, X_val, X_test):
mean = np.mean(X_train)
return X_train - mean, X_val - mean, X_test - mean
In [44]:
X_train, X_val, X_test = prepro(X_train, X_val, X_test)
In [54]:
# if net_type == 'cnn':
img_shape = (1, 28, 28)
X_train = X_train.reshape(-1, *img_shape)
X_val = X_val.reshape(-1, *img_shape)
X_test = X_test.reshape(-1, *img_shape)
In [55]:
import hipsternet.loss as loss_fun
import hipsternet.layer as l
import hipsternet.regularization as reg
import hipsternet.utils as util
class NeuralNet(object):
loss_funs = dict(
cross_ent=loss_fun.cross_entropy,
hinge=loss_fun.hinge_loss,
squared=loss_fun.squared_loss,
l2_regression=loss_fun.l2_regression,
l1_regression=loss_fun.l1_regression
)
dloss_funs = dict(
cross_ent=loss_fun.dcross_entropy,
hinge=loss_fun.dhinge_loss,
squared=loss_fun.dsquared_loss,
l2_regression=loss_fun.dl2_regression,
l1_regression=loss_fun.dl1_regression
)
forward_nonlins = dict(
relu=l.relu_forward,
lrelu=l.lrelu_forward,
sigmoid=l.sigmoid_forward,
tanh=l.tanh_forward
)
backward_nonlins = dict(
relu=l.relu_backward,
lrelu=l.lrelu_backward,
sigmoid=l.sigmoid_backward,
tanh=l.tanh_backward
)
def __init__(self, D, C, H, lam=1e-3, p_dropout=.8, loss='cross_ent', nonlin='relu'):
if loss not in NeuralNet.loss_funs.keys():
raise Exception('Loss function must be in {}!'.format(NeuralNet.loss_funs.keys()))
if nonlin not in NeuralNet.forward_nonlins.keys():
raise Exception('Nonlinearity must be in {}!'.format(NeuralNet.forward_nonlins.keys()))
self._init_model(D, C, H)
self.lam = lam
self.p_dropout = p_dropout
self.loss = loss
self.forward_nonlin = NeuralNet.forward_nonlins[nonlin]
self.backward_nonlin = NeuralNet.backward_nonlins[nonlin]
self.mode = 'classification'
if 'regression' in loss:
self.mode = 'regression'
def train_step(self, X_train, y_train):
"""
Single training step over minibatch: forward, loss, backprop
"""
y_pred, cache = self.forward(X_train, train=True)
loss = self.loss_funs[self.loss](self.model, y_pred, y_train, self.lam)
grad = self.backward(y_pred, y_train, cache)
return grad, loss
def predict_proba(self, X):
score, _ = self.forward(X, False)
return util.softmax(score)
def predict(self, X):
if self.mode == 'classification':
return np.argmax(self.predict_proba(X), axis=1)
else:
score, _ = self.forward(X, False)
y_pred = np.round(score)
return y_pred
def forward(self, X, train=False):
raise NotImplementedError()
def backward(self, y_pred, y_train, cache):
raise NotImplementedError()
def _init_model(self, D, C, H):
raise NotImplementedError()
In [56]:
class ConvNet(NeuralNet):
def __init__(self, D, C, H, lam=1e-3, p_dropout=.8, loss='cross_ent', nonlin='relu'):
super().__init__(D, C, H, lam, p_dropout, loss, nonlin)
def forward(self, X, train=False):
# Conv-1
h1, h1_cache = l.conv_forward(X, self.model['W1'], self.model['b1'])
h1, nl_cache1 = l.relu_forward(h1)
# Pool-1
hpool, hpool_cache = l.maxpool_forward(h1)
h2 = hpool.ravel().reshape(X.shape[0], -1)
# FC-7
h3, h3_cache = l.fc_forward(h2, self.model['W2'], self.model['b2'])
h3, nl_cache3 = l.relu_forward(h3)
# Softmax
score, score_cache = l.fc_forward(h3, self.model['W3'], self.model['b3'])
return score, (X, h1_cache, h3_cache, score_cache, hpool_cache, hpool, nl_cache1, nl_cache3)
def backward(self, y_pred, y_train, cache):
X, h1_cache, h3_cache, score_cache, hpool_cache, hpool, nl_cache1, nl_cache3 = cache
# Output layer
grad_y = self.dloss_funs[self.loss](y_pred, y_train)
# FC-7
dh3, dW3, db3 = l.fc_backward(grad_y, score_cache)
dh3 = self.backward_nonlin(dh3, nl_cache3)
dh2, dW2, db2 = l.fc_backward(dh3, h3_cache)
dh2 = dh2.ravel().reshape(hpool.shape)
# Pool-1
dpool = l.maxpool_backward(dh2, hpool_cache)
# Conv-1
dh1 = self.backward_nonlin(dpool, nl_cache1)
dX, dW1, db1 = l.conv_backward(dh1, h1_cache)
grad = dict(
W1=dW1, W2=dW2, W3=dW3, b1=db1, b2=db2, b3=db3
)
return grad
def _init_model(self, D, C, H):
self.model = dict(
W1=np.random.randn(D, 1, 3, 3) / np.sqrt(D / 2.),
W2=np.random.randn(D * 14 * 14, H) / np.sqrt(D * 14 * 14 / 2.),
W3=np.random.randn(H, C) / np.sqrt(H / 2.),
b1=np.zeros((D, 1)),
b2=np.zeros((1, H)),
b3=np.zeros((1, C))
)
In [57]:
# Hyper parameters
n_iter = 1000
alpha = 1e-3
mb_size = 64
n_experiment = 1
reg = 1e-5
print_after = 100
p_dropout = 0.8
loss = 'cross_ent'
nonlin = 'relu'
solver = 'sgd'
In [58]:
# from hipsternet.solver import *
# import numpy as np
# import hipsternet.utils as util
# import hipsternet.constant as c
eps = 1e-8
import copy
from sklearn.utils import shuffle as skshuffle
def get_minibatch(X, y, minibatch_size, shuffle=True):
minibatches = []
if shuffle:
X, y = skshuffle(X, y)
for i in range(0, X.shape[0], minibatch_size):
X_mini = X[i:i + minibatch_size]
y_mini = y[i:i + minibatch_size]
minibatches.append((X_mini, y_mini))
return minibatches
def sgd(nn, X_train, y_train, val_set=None, alpha=1e-3, mb_size=256, n_iter=2000, print_after=100):
minibatches = get_minibatch(X_train, y_train, mb_size)
if val_set:
X_val, y_val = val_set
for iter in range(1, n_iter + 1):
idx = np.random.randint(0, len(minibatches))
X_mini, y_mini = minibatches[idx]
grad, loss = nn.train_step(X_mini, y_mini)
if iter % print_after == 0:
if val_set:
val_acc = util.accuracy(y_val, nn.predict(X_val))
print('Iter-{} loss: {:.4f} validation: {:4f}'.format(iter, loss, val_acc))
else:
print('Iter-{} loss: {:.4f}'.format(iter, loss))
for layer in grad:
nn.model[layer] -= alpha * grad[layer]
return nn
In [59]:
solvers = dict(sgd=sgd)
solver_fun = solvers[solver]
accs = np.zeros(n_experiment)
print()
print('Experimenting on {}'.format(solver))
print()
In [60]:
for k in range(n_experiment):
print('Experiment-{}'.format(k + 1))
# Reset model
# if net_type == 'ff':
# net = nn.FeedForwardNet(D, C, H=128, lam=reg, p_dropout=p_dropout, loss=loss, nonlin=nonlin)
# elif net_type == 'cnn':
net = ConvNet(10, C, H=128)
net = solver_fun(
net, X_train, y_train, val_set=(X_val, y_val), mb_size=mb_size, alpha=alpha,
n_iter=n_iter, print_after=print_after
)
y_pred = net.predict(X_test)
accs[k] = np.mean(y_pred == y_test)
print()
print('Mean accuracy: {:.4f}, std: {:.4f}'.format(accs.mean(), accs.std()))