In [ ]:
# Import useful packages, load up & show the data
%load_ext autoreload
%autoreload 2
%matplotlib inline
import dlt
import numpy as np
import seaborn as sns
train = dlt.load_hdf5('/data/uji/train.hdf')
valid = dlt.load_hdf5('/data/uji/valid.hdf')
print(" Training: %s" % train)
print("Validation: %s" % valid)
train.show()
In [ ]:
import chainer as C
class Network(C.Chain):
def run(self, batch_x, batch_y):
batch_x = C.Variable(batch_x)
batch_y = C.Variable(batch_y)
z = self(batch_x)
loss = C.functions.softmax_cross_entropy(z, batch_y)
accuracy = 100 * C.functions.accuracy(z, batch_y)
return loss, accuracy
def evaluate(self, dataset):
loss, accuracy = self.run(dataset.x, dataset.y)
return float(loss.data), float(accuracy.data)
def classify(self, img):
return train.vocab[np.argmax(self(C.Variable(img.reshape(1, -1))).data)]
def nparams(self):
return sum(param.size for param in self.params())
def __call__(self, x):
raise NotImplementedError
class ConvNetwork(Network): # 0.718, 74.2%
def __init__(self):
H = 256
super().__init__(
a=C.links.Convolution2D(1, 8, 3, pad=1),
b=C.links.Linear(16*16, H),
c=C.links.Linear(H, H),
final=C.links.Linear(H, len(train.vocab)),
)
def __call__(self, x):
h = C.functions.tanh(self.a(C.functions.reshape(x, (-1, 1, 16, 16))))
h = C.functions.reshape(C.functions.max(h, axis=1), (-1, 16*16))
h = C.functions.tanh(self.b(h))
h = C.functions.tanh(self.c(h))
return self.final(h)
class DeepNetwork(Network): # 512: 0.72, 74.8%
def __init__(self):
H = 512
super().__init__(
a=C.links.Linear(train.x.shape[1], H),
b=C.links.Linear(H, H),
final=C.links.Linear(H, len(train.vocab)),
)
def __call__(self, x):
return self.final(C.functions.tanh(self.b(C.functions.tanh(self.a(x)))))
class MaxoutNetwork(Network): # 256x4: 0.676, 75.6%
def __init__(self):
H = 256
N = 4
super().__init__(
a=C.links.Maxout(train.x.shape[1], H, N),
b=C.links.Maxout(H, H, N),
final=C.links.Linear(H, len(train.vocab)),
)
def __call__(self, x):
return self.final(
C.functions.tanh(self.b(
C.functions.tanh(self.a(x)))))
class ShallowNetwork(Network): # 1024: 1.1, 68.2% (improving); 2048: 67.9% (improving)
def __init__(self):
H = 2048
super().__init__(
a=C.links.Linear(train.x.shape[1], H),
final=C.links.Linear(H, len(train.vocab)),
)
def __call__(self, x):
return self.final(C.functions.tanh(self.a(x)))
class LinearNetwork(Network): # 1.74, 56.0%
def __init__(self):
super().__init__(
final=C.links.Linear(train.x.shape[1], len(train.vocab)),
)
def __call__(self, x):
return self.final(x)
batch_size = 512
network = MaxoutNetwork()
opt = C.optimizers.Adam()
opt.use_cleargrads()
opt.setup(network)
#opt.add_hook(C.optimizer.WeightDecay(0.001))
log = dlt.Log()
for _ in range(4):
for i in range(0, len(train.x), batch_size):
network.cleargrads()
loss, accuracy = network.run(train.x[i:(i + batch_size)], train.y[i:(i + batch_size)])
loss.backward()
opt.update()
log.add('loss', 'train', loss)
log.add('accuracy', 'train', accuracy)
loss, accuracy = network.run(valid.x, valid.y)
log.add('loss', 'valid', loss)
log.add('accuracy', 'valid', accuracy)
log.show()
print("Valid: %.3g (%.1f%%) in %.2gs" % (network.evaluate(valid) + (log.elapsed,)))
In [ ]:
dlt.CustomInput(network.classify)
In [ ]:
print("Valid: %.3g (%.1f%%)" % network.evaluate(valid))
print(" Test: %.3g (%.1f%%)" % network.evaluate(dlt.load_hdf5('/test/uji/test.hdf')))