In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import functools
import time


/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

In [2]:
mnist = input_data.read_data_sets('', one_hot = True)


Extracting train-images-idx3-ubyte.gz
Extracting train-labels-idx1-ubyte.gz
Extracting t10k-images-idx3-ubyte.gz
Extracting t10k-labels-idx1-ubyte.gz

In [3]:
def residual_block(x, i, filters, kernel_size):
    x_copy = x
    pad_len = (kernel_size - 1) * i
    x = tf.pad(x, [[0, 0], [pad_len, 0], [0, 0]])
    x = tf.layers.conv1d(x, filters, kernel_size, dilation_rate=i,padding='valid')
    tanh = tf.nn.tanh(x)
    sigmoid = tf.nn.sigmoid(x)
    x = tanh * sigmoid
    x = tf.layers.dropout(x, 0.05, noise_shape=[x.shape[0], x.shape[1], tf.constant(1)])
    x = tf.layers.conv1d(x,filters,1,padding='same')
    return x_copy + x, x

class Model:
    def __init__(self, filters=32,kernel_size=4,dilations=[1,2,4,8],
                stacks=8):
        self.X = tf.placeholder(tf.float32, [None, 28, 28])
        self.Y = tf.placeholder(tf.float32, [None, 10])
        padded_x = tf.pad(self.X, [[0, 0], [(filters - 1), 0], [0, 0]])
        padded_x = tf.layers.conv1d(padded_x, filters, kernel_size, dilation_rate=1)
        for s in range(stacks):
            for i in dilations:
                padded_x, skip_out = residual_block(padded_x, i, filters, kernel_size)
        self.logits = tf.layers.dense(padded_x[:,-1], 10)
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = 0.002).minimize(self.cost)
        self.correct_pred = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))

In [4]:
sess = tf.InteractiveSession()
model = Model()
sess.run(tf.global_variables_initializer())

In [ ]:
EPOCH = 10
BATCH_SIZE = 128

In [ ]:
for i in range(EPOCH):
    last = time.time()
    TOTAL_LOSS, ACCURACY = 0, 0
    for n in range(0, (mnist.train.images.shape[0] // BATCH_SIZE) * BATCH_SIZE, BATCH_SIZE):
        batch_x = mnist.train.images[n: n + BATCH_SIZE, :].reshape((-1, 28, 28))
        acc, cost, _ = sess.run([model.accuracy, model.cost, model.optimizer], 
                           feed_dict = {model.X : batch_x, 
                                         model.Y : mnist.train.labels[n: n + BATCH_SIZE, :]})
        ACCURACY += acc
        TOTAL_LOSS += cost
    TOTAL_LOSS /= (mnist.train.images.shape[0] // BATCH_SIZE)
    ACCURACY /= (mnist.train.images.shape[0] // BATCH_SIZE)
    print('epoch %d, avg loss %f, avg acc %f, time taken %f secs'%(i+1,TOTAL_LOSS,ACCURACY,time.time()-last))


epoch 1, avg loss 0.276650, avg acc 0.918925, time taken 32.548837 secs
epoch 2, avg loss 0.106526, avg acc 0.966892, time taken 30.491681 secs
epoch 3, avg loss 0.068610, avg acc 0.977892, time taken 30.623620 secs
epoch 4, avg loss 0.056535, avg acc 0.981807, time taken 30.625988 secs
epoch 5, avg loss 0.050189, avg acc 0.983610, time taken 30.637084 secs