Let's do some deep learning on images using famous convolution architecture


In [ ]:
# reference https://github.com/aymericdamien/TensorFlow-Examples
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

In [ ]:
BATCH_SIZE = 100
NUM_EPOCHS = 10
NUM_CLASSES = 10
X_DIM = 28
Y_DIM = 28
PIXELS_PER_SAMPLE = X_DIM*Y_DIM
logs_path = '/tmp/mnist_logs/example'
import numpy as np
unique_label = np.unique(np.argmax(mnist.train.labels, 1))
print(unique_label)
assert NUM_CLASSES == len(unique_label), 'number of label does not match'
assert X_DIM*Y_DIM == mnist.train.images[0].size, 'total pixel does not match'

In [ ]:
def inference(X, dropout_prob):
    '''
    This function build model
    :param:X: place hoder for input data
    :return:prediction based on model
    :rtype: tensor
    '''
    X=tf.reshape(X, shape=[-1, X_DIM, Y_DIM, 1])
    # first convolution layer
    with tf.name_scope("conv1"):
        W=tf.Variable(tf.random_normal([5, 5, 1, 8] ,stddev=.1), name='weight')
        b = tf.Variable(tf.random_normal([8], stddev=.1), name= 'bias')
        conv1 = tf.nn.conv2d(X, W, strides=[1, 1, 1, 1], padding='SAME')
        conv1 = conv1+b
        conv1 = tf.nn.relu(conv1) 
    max1= tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')     

    with tf.name_scope("conv2"):
        W=tf.Variable(tf.random_normal([5, 5, 8, 16], stddev=.1), name='weight')
        b = tf.Variable(tf.random_normal([16], stddev=.1), name= 'bias')
        conv2 = tf.nn.conv2d(max1, W, strides=[1, 1, 1, 1], padding='SAME')
        conv2 = conv2+b
        conv2 = tf.nn.relu(conv2)                      
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    max2= tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 
    # building fully connected layer. Need ot reshape output to [Batch, d2, d3, feature_maps] to
    # [Batch, d2*d3*feature_maps]
    # let figure out the shape
    dim = 1
    for dim_size in max2.get_shape().as_list()[1:]:
        dim=dim*dim_size
    print(max2.get_shape().as_list()[1:])    
    print(dim)
        
    max2 = tf.reshape(max2, shape =[-1, dim])    
    with tf.name_scope("fc"):
        W = tf.Variable(tf.random_normal([dim, 64], stddev=.1), name='weight')
        b = tf.Variable(tf.random_normal([64], stddev=.1), name='bias')
        fc1 = tf.matmul(max2, W) + b
        fc1 = tf.nn.relu(fc1)
       
    fc1 = tf.nn.dropout(fc1, dropout_prob)
    with tf.name_scope("output_layer"):
        W = tf.Variable(tf.random_normal([64, NUM_CLASSES],stddev=.1), name='weight')
        b = tf.Variable(tf.random_normal([NUM_CLASSES], stddev=.1), name='bias')
        fc2 = tf.matmul(fc1, W) + b
    return fc2

In [ ]:
X =  tf.placeholder(tf.float32, [None, PIXELS_PER_SAMPLE])
Y = tf.placeholder(tf.float32, [None, NUM_CLASSES])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# building model
pred = inference(X, keep_prob)
# Define loss and optimizer
with tf.name_scope('Loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, Y))
opt = tf.train.GradientDescentOptimizer(learning_rate=.5).minimize(loss)

# Evaluate model
with tf.name_scope('Accuracy'):
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Create a summary to monitor cost tensor
tf.scalar_summary("loss", loss)
# Create a summary to monitor accuracy tensor
tf.scalar_summary("accuracy", accuracy)
# Merge all summaries into a single op
merged_summary_op = tf.merge_all_summaries()

In [ ]:
config = tf.ConfigProto(
        device_count = {'GPU': 0}
    )

with tf.Session(config=config) as sess:
    sess.run(tf.initialize_all_variables())
    total_batch = int(mnist.train.num_examples/BATCH_SIZE)
    # op to write logs to Tensorboard
    summary_writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph())    
    for eidx in range(NUM_EPOCHS):
        for bidx in range(mnist.train.num_examples// BATCH_SIZE):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            xs = xs.astype(np.float32)
            ys= ys.astype(np.float32)
            #p = np.float32(.5)
            _, loss_val, summary_val= sess.run([ opt, loss ,merged_summary_op], feed_dict={X:xs, Y:ys, keep_prob:.5 })
            # Write logs at every iteration
            summary_writer.add_summary(summary_val, eidx * total_batch + bidx)
            if (bidx+1)%100 == 0: # print result every 100 batch
                accuracy_val = accuracy.eval(session=sess, feed_dict={X:xs, Y:ys, keep_prob:1.0})
                print('epoch {} batch {} loss {} accu {}'.format(eidx +1 , bidx +1, loss_val, accuracy_val))
        print('epoch {} # test accuracy {}#'.format(eidx +1, accuracy.eval(session=sess,
            feed_dict= {X:mnist.test.images.astype(np.float32), Y: mnist.test.labels, keep_prob:1.0})))