In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math

In [2]:
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
mnist = mnist = mnist_data.read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
tf.set_random_seed(0)


Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz

In [12]:
## Initializing all variables
X = tf.placeholder(tf.float32, [None, 28, 28, 1]) # Placeholder for images
Y_ = tf.placeholder(tf.float32, [None, 10]) # Placeholder for actual labels
lr = tf.placeholder(tf.float32)
  ## We've no dropouts
## Number of output channels in each convolutional layer
K = 4
L = 8
M = 12

### Initilizing weights
   ## For convolutional layer
W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev = 0.1))
B1 = tf.Variable(tf.ones([K])/10)
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev = 0.1))
B2 = tf.Variable(tf.ones([L])/10)
W3 = tf.Variable(tf.truncated_normal([5, 5, L, M], stddev = 0.1))
B3 = tf.Variable(tf.ones([M])/10)

  ##For fully connected layers
W4 = tf.Variable(tf.truncated_normal([7*7*M, 200], stddev = 0.1))
B4 = tf.Variable(tf.ones([200])/10)
W5 = tf.Variable(tf.truncated_normal([200, 10], stddev = 0.1))
B5 = tf.Variable(tf.ones([10])/10)

## Convolutional model calculation
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides = [1, 1, 1, 1], padding= "SAME") + B1)
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides = [1, 2, 2, 1], padding = "SAME") + B2)
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides = [1, 2, 2, 1], padding = "SAME") + B3)
YF = tf.reshape(Y3, [-1, 7*7*M])
Y4 = tf.nn.relu(tf.matmul(YF, W4) + B4)
Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)

  ## Parameters for learning rate decay
max_learning_rate = 0.003
min_learning_rate = 0.0001
decay_speed = 2000.0

# accuracy calculation
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

## Optimizer function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits= Ylogits, labels = Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * 100 ##Normalization for batch size = 100

## Training step
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

In [13]:
## Training phase
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) ## To initilize all variables

for i in range(10000):
    batch_X, batch_Y = mnist.train.next_batch(100)
    Learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
    train_data = {X: batch_X, Y_: batch_Y, lr: Learning_rate}
    sess.run(train_step, feed_dict = train_data)
    
    ## Accuracu for training set, per 2000 iterations
    if(i%2000 == 0):
        a, c = sess.run([accuracy, cross_entropy], {X: batch_X, Y_: batch_Y})
        print a, c
    
    
## Accuracy on test set
test_data = {X: mnist.test.images, Y_: mnist.test.labels}
a, c = sess.run([accuracy, cross_entropy], test_data)
print a, c


0.16 225.259
0.99 5.64314
1.0 0.226078
1.0 0.124225
1.0 0.0130643
0.9891 5.90268

In [3]:
#### bigger convolutional network(more degrees of freedom) along wtih dropout
  #### in fully connected layer
## Initializing all variables
X = tf.placeholder(tf.float32, [None, 28, 28, 1]) # Placeholder for images
Y_ = tf.placeholder(tf.float32, [None, 10]) # Placeholder for actual labels
lr = tf.placeholder(tf.float32)
pkeep = tf.placeholder(tf.float32) ## droput
## Number of output channels in each convolutional layer
K = 6
L = 12
M = 24

### Initilizing weights
   ## For convolutional layer
W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev = 0.1))
B1 = tf.Variable(tf.ones([K])/10)
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev = 0.1))
B2 = tf.Variable(tf.ones([L])/10)
W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev = 0.1))
B3 = tf.Variable(tf.ones([M])/10)

  ##For fully connected layers
W4 = tf.Variable(tf.truncated_normal([7*7*M, 200], stddev = 0.1))
B4 = tf.Variable(tf.ones([200])/10)
W5 = tf.Variable(tf.truncated_normal([200, 10], stddev = 0.1))
B5 = tf.Variable(tf.ones([10])/10)

## Convolutional model calculation
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides = [1, 1, 1, 1], padding= "SAME") + B1)
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides = [1, 2, 2, 1], padding = "SAME") + B2)
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides = [1, 2, 2, 1], padding = "SAME") + B3)
YF = tf.reshape(Y3, [-1, 7*7*M])
Y4 = tf.nn.relu(tf.matmul(YF, W4) + B4)
Y4d = tf.nn.dropout(Y4, pkeep)
Ylogits = tf.matmul(Y4d, W5) + B5
Y = tf.nn.softmax(Ylogits)

  ## Parameters for learning rate decay
max_learning_rate = 0.003
min_learning_rate = 0.0001
decay_speed = 2000.0

# accuracy calculation
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

## Optimizer function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits= Ylogits, labels = Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * 100 ##Normalization for batch size = 100

## Training step
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

In [4]:
## Training phase
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) ## To initilize all variables

for i in range(10000):
    batch_X, batch_Y = mnist.train.next_batch(100)
    Learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
    train_data = {X: batch_X, Y_: batch_Y, pkeep: 0.75, lr: Learning_rate }
    sess.run(train_step, feed_dict = train_data)
    
    ## Accuracu for training set, per 2000 iterations
    if(i%2000 == 0):
        a, c = sess.run([accuracy, cross_entropy], {X: batch_X, Y_: batch_Y, pkeep: 1.0})
        print a, c
    
    
## Accuracy on test set
test_data = {X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0}
a, c = sess.run([accuracy, cross_entropy], test_data)
print a, c


0.12 240.916
1.0 0.809248
1.0 0.244806
1.0 0.31861
1.0 0.0279735
0.9919 3.21379