In [ ]:
# Import useful packages, load up & show the data

%load_ext autoreload
%autoreload 2

%matplotlib inline
import dlt
import numpy as np
import seaborn as sns

train = dlt.load_hdf5('/data/uji/train.hdf')
valid = dlt.load_hdf5('/data/uji/valid.hdf')
print("  Training: %s" % train)
print("Validation: %s" % valid)
train.show()

In [ ]:
import chainer as C

class Network(C.Chain):
    def run(self, batch_x, batch_y):
        batch_x = C.Variable(batch_x)
        batch_y = C.Variable(batch_y)
        z = self(batch_x)
        loss = C.functions.softmax_cross_entropy(z, batch_y)
        accuracy = 100 * C.functions.accuracy(z, batch_y)
        return loss, accuracy

    def evaluate(self, dataset):
        loss, accuracy = self.run(dataset.x, dataset.y)
        return float(loss.data), float(accuracy.data)

    def classify(self, img):
        return train.vocab[np.argmax(self(C.Variable(img.reshape(1, -1))).data)]

    def nparams(self):
        return sum(param.size for param in self.params())

    def __call__(self, x):
        raise NotImplementedError


class ConvNetwork(Network): # 0.718, 74.2%
    def __init__(self):
        H = 256
        super().__init__(
            a=C.links.Convolution2D(1, 8, 3, pad=1),
            b=C.links.Linear(16*16, H),
            c=C.links.Linear(H, H),
            final=C.links.Linear(H, len(train.vocab)),
        )
    def __call__(self, x):
        h = C.functions.tanh(self.a(C.functions.reshape(x, (-1, 1, 16, 16))))
        h = C.functions.reshape(C.functions.max(h, axis=1), (-1, 16*16))
        h = C.functions.tanh(self.b(h))
        h = C.functions.tanh(self.c(h))
        return self.final(h)

class DeepNetwork(Network): # 512: 0.72, 74.8%
    def __init__(self):
        H = 512
        super().__init__(
            a=C.links.Linear(train.x.shape[1], H),
            b=C.links.Linear(H, H),
            final=C.links.Linear(H, len(train.vocab)),
        )
    def __call__(self, x):
        return self.final(C.functions.tanh(self.b(C.functions.tanh(self.a(x)))))

class MaxoutNetwork(Network): # 256x4: 0.676, 75.6%
    def __init__(self):
        H = 256
        N = 4
        super().__init__(
            a=C.links.Maxout(train.x.shape[1], H, N),
            b=C.links.Maxout(H, H, N),
            final=C.links.Linear(H, len(train.vocab)),
        )
    def __call__(self, x):
        return self.final(
            C.functions.tanh(self.b(
            C.functions.tanh(self.a(x)))))

class ShallowNetwork(Network): # 1024: 1.1, 68.2% (improving); 2048: 67.9% (improving)
    def __init__(self):
        H = 2048
        super().__init__(
            a=C.links.Linear(train.x.shape[1], H),
            final=C.links.Linear(H, len(train.vocab)),
        )
    def __call__(self, x):
        return self.final(C.functions.tanh(self.a(x)))

class LinearNetwork(Network): # 1.74, 56.0%
    def __init__(self):
        super().__init__(
            final=C.links.Linear(train.x.shape[1], len(train.vocab)),
        )
    def __call__(self, x):
        return self.final(x)


batch_size = 512

network = MaxoutNetwork()
opt = C.optimizers.Adam()
opt.use_cleargrads()
opt.setup(network)
#opt.add_hook(C.optimizer.WeightDecay(0.001))

log = dlt.Log()
for _ in range(4):
    for i in range(0, len(train.x), batch_size):
        network.cleargrads()
        loss, accuracy = network.run(train.x[i:(i + batch_size)], train.y[i:(i + batch_size)])
        loss.backward()
        opt.update()
        log.add('loss', 'train', loss)
        log.add('accuracy', 'train', accuracy)
    loss, accuracy = network.run(valid.x, valid.y)
    log.add('loss', 'valid', loss)
    log.add('accuracy', 'valid', accuracy)

log.show()
print("Valid: %.3g (%.1f%%) in %.2gs" % (network.evaluate(valid) + (log.elapsed,)))

In [ ]:
dlt.CustomInput(network.classify)

In [ ]:
print("Valid: %.3g (%.1f%%)" % network.evaluate(valid))
print(" Test: %.3g (%.1f%%)" % network.evaluate(dlt.load_hdf5('/test/uji/test.hdf')))