In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
In [4]:
# import time
tf.reset_default_graph()
sess = tf.InteractiveSession()
In [79]:
# first read data from mnist dataset
def readDataset(filename_queue):
tf.train.string_input_producer(filename_queue)
reader = tf.TextLineReader()
return
In [80]:
import time
import math
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.examples.tutorials.mnist import mnist
In [81]:
train_dir = 'dataset/mnist'
# the datasets include traindata and testdata,
# so get traindata by using datasets.train for example.
# And get other dataset like before .
datasets = input_data.read_data_sets(train_dir=train_dir)
construct dnn
In [82]:
def onelayer(inputs,layername,w_initial_value,b_initial_value,i):
with tf.variable_scope(layername):
# a = f(W*x+b) f refers to ReLU
# W = tf.Variable(name='weights',initial_value=w_initial_value,dtype=tf.float32)
weights = tf.Variable(
w_initial_value,
name='weights')
# record weights
tf.histogram_summary('weights'+str(i),weights)
biases = tf.Variable(name='biases',initial_value=b_initial_value)
# record biases
tf.histogram_summary('biases'+str(i),biases)
outputs = tf.nn.relu(tf.matmul(inputs,weights)+biases)
return outputs
In [83]:
# inputs contain batchsize * oneImage of data
# inputs = tf.placeholder(dtype='float32',shape=(None,mnist.IMAGE_PIXELS))
def inference(images,fcn):
# fcn : num of units in each layers using `[,]` format except input and output layer
# outputs=None
insize = mnist.IMAGE_PIXELS
inputs = images
for i in range(len(fcn)):
outputs = onelayer(
inputs,'fcn'+str(i),
tf.truncated_normal([insize, fcn[i]],stddev=1.0 / math.sqrt(float(insize))),
tf.zeros(fcn[i]),i)
tf.histogram_summary('fcn'+str(i)+'/activations',outputs)
# last layer's outputs is the next layer's inputs
inputs = outputs
insize = fcn[i]
# get logit(probability of each class)
# num of classes is 10 for mnist
logits = onelayer(
inputs,'fcn'+str(i+1),
tf.truncated_normal([insize, 10],stddev=1.0 / math.sqrt(float(insize))),
tf.zeros(10),i+1)
return logits
In [84]:
def getloss(logits,labels):
# softmax loss function
# logits is [batch_size,num_classes] tensor and labels is [batch_size]
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy,name='xentropy_mean')
return loss
In [85]:
def training(loss,learning_rate):
# add summary
# tf.summary.scalar('loss', loss)
tf.scalar_summary('loss',loss)
# create optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
# global_step
global_step = tf.Variable(0,name='global_step',trainable=False)
# minimize the loss
train_op = optimizer.minimize(loss,global_step = global_step)
return train_op
In [86]:
def evaluation(logits,labels):
# correct is a bool tensor
# return a scalar int32 tensor
correct = tf.nn.in_top_k(logits,labels,1)
# get error rate in one batch
return tf.reduce_sum(tf.cast(correct,tf.int32))
In [87]:
# def solve(datasets,fcn):
# # init
# logits = inference(inputs)
# init = tf.initialize_all_variables()
# sess.run(init)
In [88]:
# create placeholder for images and labels at the number of batch_size
def placeholder_inputs(batch_size):
# couvert dataset to images and labels
images_placeholder = tf.placeholder(tf.float32,shape=(batch_size,mnist.IMAGE_PIXELS))
labels_placeholder = tf.placeholder(tf.int32,shape=(batch_size))
return images_placeholder,labels_placeholder
In [89]:
# then create method for feed placeholder
def fill_feed_dict(dataset,images_pl,labels_pl,batch_size):
# dataset refers to train or test dataset
# feed bacth_size numbers of images and labels
images_feed,labels_feed = dataset.next_batch(batch_size)
feed_dict = {
images_pl:images_feed,
labels_pl:labels_feed,
}
return feed_dict
In [90]:
def do_eval(sess,
eval_correct,
images_placeholder,
labels_placeholder,
data_set,batch_size):
# run test in test_data
true_count = 0
steps_per_epoch = data_set.num_examples // batch_size
num_examples = steps_per_epoch * batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(data_set,
images_placeholder,
labels_placeholder)
true_count += sess.run(eval_correct,feed_dict=feed_dict)
precision = true_count / num_examples
print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' %
(num_examples,true_count,precision))
In [93]:
train_dir = 'dataset/mnist'
summary_dir = 'dataset/mnist/summary'
def run_training(batch_size,fcn,learning_rate,iters):
# the datasets include traindata and testdata,
# so get traindata by using datasets.train for example.
# And get other dataset like before .
datasets = input_data.read_data_sets(train_dir=train_dir)
with tf.Graph().as_default():
# generate placeholders for images and labels
images_placeholder,labels_placeholder = placeholder_inputs(
batch_size)
# build a Graph
logits = inference(images_placeholder,fcn)
# add loss option
loss = getloss(logits,labels_placeholder)
# tf.scalar_summary('loss',loss)
# Add to the Graph the Ops that calculate and apply gradients
train_op = training(loss,learning_rate)
# Add the Op to compare the logits to the labels during evaluation
eval_correct = evaluation(logits,labels_placeholder)
# build the summary
summary = tf.merge_all_summaries()
# initialize all variable
init = tf.initialize_all_variables()
# Create a saver for writing training checkpoints
saver = tf.train.Saver()
# Create a session for running Ops on the Graph
sess = tf.Session()
# Instantiate a SummaryWriter to output summaries and the Graph.
summary_writer = tf.train.SummaryWriter(summary_dir,sess.graph)
# run the Op to initialize the variables.
sess.run(init)
# Start the training loop.
for step in range(iters):
start_time = time.time()
# feed data to placeholders(train_data)
feed_dict = fill_feed_dict(datasets.train,
images_placeholder,
labels_placeholder,
batch_size)
# get the return value (tensor from the Graph)
_, loss_value = sess.run([train_op,loss],
feed_dict=feed_dict)
# get the duration
duration = time.time() - start_time
# Add the summary often
if(step%100 ==0):
print('Step %d: loss = %.2f (%.3f sec)' % (step,loss_value,duration))
# update the events file.
summary_str = sess.run(summary,
feed_dict=feed_dict)
summary_writer.add_summary(summary_str,step)
summary_writer.flush()
# Save a checkpoint and evaluate the model periodically
if(step + 1)% 1000 == 0 or (step + 1)==iters:
# set dir to save
checkpoint_file = os.path.join(summary_dir,'checkpoint')
saver.save(sess,checkpoint_file,global_step = step)
# evaluate against the training set.
print('Validation Data Eval:')
do_eval(sess,
eval_correct,
images_placeholder,
labels_placeholder,
datasets.validation)
In [94]:
fcn = [12,24,24,32,32,32,32,128,128]
learning_rate = 0.01
iters = 10000
run_training(batch_size=100,fcn=fcn,learning_rate=learning_rate,iters=iters)
In [21]:
np.zeros(2)
Out[21]:
In [22]:
tf.contrib.layers.fully_connected?
In [44]:
in_size= 10
out_size= 10
x = tf.placeholder(tf.float32,shape=[None,in_size])
w = tf.Variable(tf.truncated_normal([in_size,out_size],stddev=1e-5),name='weights')
b = tf.Variable(tf.zeros([out_size]))
output = tf.matmul(x, w) + b
# calculate the loss between y and output
labels = tf.placeholder(tf.int32,shape=[None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
output,labels,name='xentropy')
loss = tf.reduce_mean(cross_entropy,name='xentropy_mean')
In [45]:
# calculate the gradient the
gradient_w = tf.gradients(output,w)
gradient_b = tf.gradients(output,b)
In [47]:
# add summary of gradient of w and b
tf.histogram_summary('gradient_w',gradient_w)
tf.histogram_summary('gradient_b',gradient_b)
tf.scalar_summary('loss',loss)
summary_dir = 'summary'
summary = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(summary_dir,sess.graph)
# run the session
init = tf.initialize_all_variables()
sess.run(init)
# feed the data
# [batch * in_size]
data = np.random.random([5,10])
feed_dict = {
x:data,
labels:np.random.randint([5]),
}
loss_value = sess.run(loss,
feed_dict=feed_dict)
# get summary and display
# summary_str = sess.run(summary,
# feed_dict={x:data})
# record one item
summary_writer.add_summary(summary_str,1)
summary_writer.flush()
In [37]:
sess.run(gradient_b,feed_dict={x:data})
Out[37]:
In [ ]: