In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
In [2]:
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
mnist = mnist = mnist_data.read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
tf.set_random_seed(0)
In [12]:
## Initializing all variables
X = tf.placeholder(tf.float32, [None, 28, 28, 1]) # Placeholder for images
Y_ = tf.placeholder(tf.float32, [None, 10]) # Placeholder for actual labels
lr = tf.placeholder(tf.float32)
## We've no dropouts
## Number of output channels in each convolutional layer
K = 4
L = 8
M = 12
### Initilizing weights
## For convolutional layer
W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev = 0.1))
B1 = tf.Variable(tf.ones([K])/10)
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev = 0.1))
B2 = tf.Variable(tf.ones([L])/10)
W3 = tf.Variable(tf.truncated_normal([5, 5, L, M], stddev = 0.1))
B3 = tf.Variable(tf.ones([M])/10)
##For fully connected layers
W4 = tf.Variable(tf.truncated_normal([7*7*M, 200], stddev = 0.1))
B4 = tf.Variable(tf.ones([200])/10)
W5 = tf.Variable(tf.truncated_normal([200, 10], stddev = 0.1))
B5 = tf.Variable(tf.ones([10])/10)
## Convolutional model calculation
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides = [1, 1, 1, 1], padding= "SAME") + B1)
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides = [1, 2, 2, 1], padding = "SAME") + B2)
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides = [1, 2, 2, 1], padding = "SAME") + B3)
YF = tf.reshape(Y3, [-1, 7*7*M])
Y4 = tf.nn.relu(tf.matmul(YF, W4) + B4)
Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)
## Parameters for learning rate decay
max_learning_rate = 0.003
min_learning_rate = 0.0001
decay_speed = 2000.0
# accuracy calculation
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
## Optimizer function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits= Ylogits, labels = Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * 100 ##Normalization for batch size = 100
## Training step
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
In [13]:
## Training phase
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) ## To initilize all variables
for i in range(10000):
batch_X, batch_Y = mnist.train.next_batch(100)
Learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
train_data = {X: batch_X, Y_: batch_Y, lr: Learning_rate}
sess.run(train_step, feed_dict = train_data)
## Accuracu for training set, per 2000 iterations
if(i%2000 == 0):
a, c = sess.run([accuracy, cross_entropy], {X: batch_X, Y_: batch_Y})
print a, c
## Accuracy on test set
test_data = {X: mnist.test.images, Y_: mnist.test.labels}
a, c = sess.run([accuracy, cross_entropy], test_data)
print a, c
In [3]:
#### bigger convolutional network(more degrees of freedom) along wtih dropout
#### in fully connected layer
## Initializing all variables
X = tf.placeholder(tf.float32, [None, 28, 28, 1]) # Placeholder for images
Y_ = tf.placeholder(tf.float32, [None, 10]) # Placeholder for actual labels
lr = tf.placeholder(tf.float32)
pkeep = tf.placeholder(tf.float32) ## droput
## Number of output channels in each convolutional layer
K = 6
L = 12
M = 24
### Initilizing weights
## For convolutional layer
W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev = 0.1))
B1 = tf.Variable(tf.ones([K])/10)
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev = 0.1))
B2 = tf.Variable(tf.ones([L])/10)
W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev = 0.1))
B3 = tf.Variable(tf.ones([M])/10)
##For fully connected layers
W4 = tf.Variable(tf.truncated_normal([7*7*M, 200], stddev = 0.1))
B4 = tf.Variable(tf.ones([200])/10)
W5 = tf.Variable(tf.truncated_normal([200, 10], stddev = 0.1))
B5 = tf.Variable(tf.ones([10])/10)
## Convolutional model calculation
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides = [1, 1, 1, 1], padding= "SAME") + B1)
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides = [1, 2, 2, 1], padding = "SAME") + B2)
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides = [1, 2, 2, 1], padding = "SAME") + B3)
YF = tf.reshape(Y3, [-1, 7*7*M])
Y4 = tf.nn.relu(tf.matmul(YF, W4) + B4)
Y4d = tf.nn.dropout(Y4, pkeep)
Ylogits = tf.matmul(Y4d, W5) + B5
Y = tf.nn.softmax(Ylogits)
## Parameters for learning rate decay
max_learning_rate = 0.003
min_learning_rate = 0.0001
decay_speed = 2000.0
# accuracy calculation
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
## Optimizer function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits= Ylogits, labels = Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * 100 ##Normalization for batch size = 100
## Training step
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
In [4]:
## Training phase
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) ## To initilize all variables
for i in range(10000):
batch_X, batch_Y = mnist.train.next_batch(100)
Learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
train_data = {X: batch_X, Y_: batch_Y, pkeep: 0.75, lr: Learning_rate }
sess.run(train_step, feed_dict = train_data)
## Accuracu for training set, per 2000 iterations
if(i%2000 == 0):
a, c = sess.run([accuracy, cross_entropy], {X: batch_X, Y_: batch_Y, pkeep: 1.0})
print a, c
## Accuracy on test set
test_data = {X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0}
a, c = sess.run([accuracy, cross_entropy], test_data)
print a, c