import tensorflow as tf
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline

# import time
sess = tf.InteractiveSession()

calculation loss function not in top layer

I just want to run dnn in mnist dataset to test my idea.

I choose mnist due to its not too easy and not too complex.

# first read data from mnist dataset
def readDataset(filename_queue):
    reader = tf.TextLineReader()

import time
import math
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.examples.tutorials.mnist import mnist

train_dir = 'dataset/mnist'
# the datasets include traindata and testdata,
# so get traindata by using datasets.train for example.
# And get other dataset like before .
datasets = input_data.read_data_sets(train_dir=train_dir)

Extracting dataset/mnist/train-images-idx3-ubyte.gz
Extracting dataset/mnist/train-labels-idx1-ubyte.gz
Extracting dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting dataset/mnist/t10k-labels-idx1-ubyte.gz

construct dnn

def onelayer(inputs,layername,w_initial_value,b_initial_value,i):
    with tf.variable_scope(layername):
#         a = f(W*x+b) f refers to ReLU 
#         W = tf.Variable(name='weights',initial_value=w_initial_value,dtype=tf.float32)
        weights = tf.Variable(
#     record weights
        biases = tf.Variable(name='biases',initial_value=b_initial_value)
#         record biases
        outputs = tf.nn.relu(tf.matmul(inputs,weights)+biases)
        return outputs

# inputs contain batchsize * oneImage of data
# inputs = tf.placeholder(dtype='float32',shape=(None,mnist.IMAGE_PIXELS))

def inference(images,fcn):
#     fcn : num of units in each layers using `[,]` format except input and output layer 
#     outputs=None

    insize = mnist.IMAGE_PIXELS
    inputs = images
    for i in range(len(fcn)):
        outputs = onelayer(
            tf.truncated_normal([insize, fcn[i]],stddev=1.0 / math.sqrt(float(insize))),
#        last layer's outputs is the next layer's inputs
        inputs = outputs
        insize = fcn[i]
#     get logit(probability of each class)
# num of classes is 10 for mnist
    logits = onelayer(
    tf.truncated_normal([insize, 10],stddev=1.0 / math.sqrt(float(insize))),

    return logits

def getloss(logits,labels):
#     softmax loss function
# logits is [batch_size,num_classes] tensor and labels is [batch_size]
    labels = tf.to_int64(labels)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits, labels, name='xentropy')
    loss = tf.reduce_mean(cross_entropy,name='xentropy_mean')
    return loss

def training(loss,learning_rate):

#     add summary
#     tf.summary.scalar('loss', loss)
#     create optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
#     global_step 
    global_step = tf.Variable(0,name='global_step',trainable=False)
#     minimize the loss 
    train_op = optimizer.minimize(loss,global_step = global_step)
    return train_op

def evaluation(logits,labels):
#     correct is a bool tensor
# return a scalar int32 tensor
    correct = tf.nn.in_top_k(logits,labels,1)
#     get error rate in one batch
    return tf.reduce_sum(tf.cast(correct,tf.int32))

# def solve(datasets,fcn):
#     #     init
#     logits = inference(inputs)
#     init = tf.initialize_all_variables()

# create placeholder for images and labels at the number of batch_size
def placeholder_inputs(batch_size):
#     couvert dataset to images and labels
    images_placeholder = tf.placeholder(tf.float32,shape=(batch_size,mnist.IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32,shape=(batch_size))
    return images_placeholder,labels_placeholder

# then create method for feed placeholder
def fill_feed_dict(dataset,images_pl,labels_pl,batch_size):
#     dataset refers to train or test dataset

# feed bacth_size numbers of images and labels 
    images_feed,labels_feed = dataset.next_batch(batch_size)
    feed_dict = {
    return feed_dict

def do_eval(sess,
#     run test in test_data
    true_count = 0
    steps_per_epoch = data_set.num_examples // batch_size
    num_examples = steps_per_epoch * batch_size
    for step in range(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,
        true_count +=,feed_dict=feed_dict)
    precision = true_count / num_examples
    print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' %

train_dir = 'dataset/mnist'
summary_dir = 'dataset/mnist/summary'
def run_training(batch_size,fcn,learning_rate,iters):
    # the datasets include traindata and testdata,
    # so get traindata by using datasets.train for example.
    # And get other dataset like before .
    datasets = input_data.read_data_sets(train_dir=train_dir)
    with tf.Graph().as_default():
#         generate placeholders for images and labels
        images_placeholder,labels_placeholder = placeholder_inputs(
#       build a Graph
        logits = inference(images_placeholder,fcn)
#         add loss option
        loss = getloss(logits,labels_placeholder)
#         tf.scalar_summary('loss',loss)
#         Add to the Graph the Ops that calculate and apply gradients
        train_op = training(loss,learning_rate)
#         Add the Op to compare the logits to the labels during evaluation
        eval_correct = evaluation(logits,labels_placeholder)
#         build the summary 
        summary = tf.merge_all_summaries()
#         initialize all variable
        init = tf.initialize_all_variables()
#         Create a saver for writing training checkpoints
        saver = tf.train.Saver()
#         Create a session for running Ops on the Graph
        sess = tf.Session()
#         Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.train.SummaryWriter(summary_dir,sess.graph)
#         run the Op to initialize the variables.
#         Start the training loop.
        for step in range(iters):
            start_time = time.time()
#             feed data to placeholders(train_data)
            feed_dict = fill_feed_dict(datasets.train,
#             get the return value (tensor from the Graph)
            _, loss_value =[train_op,loss],
#             get the duration
            duration = time.time() - start_time
#         Add the summary often
            if(step%100 ==0):
                print('Step %d: loss = %.2f (%.3f sec)' % (step,loss_value,duration))
#                 update the events file.
                summary_str =,
#                 Save a checkpoint and evaluate the model periodically
                if(step + 1)% 1000 == 0 or (step + 1)==iters:
#                     set dir to save
                    checkpoint_file = os.path.join(summary_dir,'checkpoint')
          ,checkpoint_file,global_step = step)
#                     evaluate against the training set.
                    print('Validation Data Eval:')

fcn = [12,24,24,32,32,32,32,128,128]
learning_rate = 0.01
iters = 10000

Step 0: loss = 2.30 (0.022 sec)
Step 100: loss = 2.30 (0.002 sec)
Step 200: loss = 2.30 (0.002 sec)
Step 300: loss = 2.30 (0.005 sec)
Step 400: loss = 2.30 (0.003 sec)
Step 500: loss = 2.30 (0.002 sec)
Step 600: loss = 2.30 (0.002 sec)
Step 700: loss = 2.30 (0.007 sec)
Step 800: loss = 2.29 (0.007 sec)
Step 900: loss = 2.29 (0.002 sec)
Step 1000: loss = 2.28 (0.002 sec)
Step 1100: loss = 2.26 (0.051 sec)
Step 1200: loss = 2.24 (0.002 sec)
Step 1300: loss = 2.15 (0.003 sec)
Step 1400: loss = 2.15 (0.004 sec)
Step 1500: loss = 2.09 (0.004 sec)
Step 1600: loss = 2.03 (0.002 sec)
Step 1700: loss = 2.00 (0.002 sec)
Step 1800: loss = 2.00 (0.003 sec)
Step 1900: loss = 1.99 (0.002 sec)
Step 2000: loss = 1.97 (0.003 sec)
Step 2100: loss = 1.87 (0.002 sec)
Step 2200: loss = 1.82 (0.050 sec)
Step 2300: loss = 1.71 (0.002 sec)
Step 2400: loss = 1.66 (0.002 sec)
Step 2500: loss = 1.67 (0.003 sec)
Step 2600: loss = 1.43 (0.002 sec)
Step 2700: loss = 1.76 (0.004 sec)
Step 2800: loss = 1.29 (0.002 sec)
Step 2900: loss = 1.27 (0.003 sec)
Step 3000: loss = 1.14 (0.002 sec)
Step 3100: loss = 0.88 (0.005 sec)
Step 3200: loss = 0.95 (0.002 sec)
Step 3300: loss = 1.03 (0.065 sec)
Step 3400: loss = 0.76 (0.003 sec)
Step 3500: loss = 0.83 (0.003 sec)
Step 3600: loss = 0.86 (0.002 sec)
Step 3700: loss = 0.77 (0.004 sec)
Step 3800: loss = 0.96 (0.002 sec)
Step 3900: loss = 0.58 (0.004 sec)
Step 4000: loss = 0.77 (0.002 sec)
Step 4100: loss = 0.69 (0.003 sec)
Step 4200: loss = 0.89 (0.003 sec)
Step 4300: loss = 0.77 (0.002 sec)
Step 4400: loss = 0.68 (0.069 sec)
Step 4500: loss = 0.72 (0.003 sec)
Step 4600: loss = 0.70 (0.002 sec)
Step 4700: loss = 0.97 (0.004 sec)
Step 4800: loss = 0.67 (0.002 sec)
Step 4900: loss = 0.80 (0.002 sec)
Step 5000: loss = 0.55 (0.003 sec)
Step 5100: loss = 0.69 (0.002 sec)
Step 5200: loss = 0.48 (0.002 sec)
Step 5300: loss = 0.51 (0.003 sec)
Step 5400: loss = 0.53 (0.002 sec)
Step 5500: loss = 0.42 (0.057 sec)
Step 5600: loss = 0.52 (0.002 sec)
Step 5700: loss = 0.59 (0.002 sec)
Step 5800: loss = 0.66 (0.002 sec)
Step 5900: loss = 0.49 (0.002 sec)
Step 6000: loss = 0.62 (0.002 sec)
Step 6100: loss = 0.43 (0.002 sec)
Step 6200: loss = 0.61 (0.002 sec)
Step 6300: loss = 0.61 (0.002 sec)
Step 6400: loss = 0.63 (0.002 sec)
Step 6500: loss = 0.51 (0.002 sec)
Step 6600: loss = 0.64 (0.049 sec)
Step 6700: loss = 0.44 (0.002 sec)
Step 6800: loss = 0.47 (0.002 sec)
Step 6900: loss = 0.45 (0.002 sec)
Step 7000: loss = 0.37 (0.002 sec)
Step 7100: loss = 0.56 (0.002 sec)
Step 7200: loss = 0.60 (0.002 sec)
Step 7300: loss = 0.39 (0.002 sec)
Step 7400: loss = 0.45 (0.002 sec)
Step 7500: loss = 0.54 (0.002 sec)
Step 7600: loss = 0.63 (0.002 sec)
Step 7700: loss = 0.53 (0.051 sec)
Step 7800: loss = 0.41 (0.016 sec)
Step 7900: loss = 0.48 (0.002 sec)
Step 8000: loss = 0.44 (0.002 sec)
Step 8100: loss = 0.48 (0.002 sec)
Step 8200: loss = 0.35 (0.002 sec)
Step 8300: loss = 0.73 (0.002 sec)
Step 8400: loss = 0.45 (0.002 sec)
Step 8500: loss = 0.51 (0.003 sec)
Step 8600: loss = 0.27 (0.002 sec)
Step 8700: loss = 0.50 (0.002 sec)
Step 8800: loss = 0.26 (0.066 sec)
Step 8900: loss = 0.59 (0.002 sec)
Step 9000: loss = 0.40 (0.002 sec)
Step 9100: loss = 0.51 (0.002 sec)
Step 9200: loss = 0.51 (0.002 sec)
Step 9300: loss = 0.43 (0.002 sec)
Step 9400: loss = 0.33 (0.002 sec)
Step 9500: loss = 0.30 (0.002 sec)
Step 9600: loss = 0.48 (0.002 sec)
Step 9700: loss = 0.72 (0.003 sec)
Step 9800: loss = 0.43 (0.002 sec)
Step 9900: loss = 0.34 (0.058 sec)

array([ 0.,  0.])

another practice for getting gradients of weights and biases

in_size= 10 
out_size=  10
x = tf.placeholder(tf.float32,shape=[None,in_size])
w = tf.Variable(tf.truncated_normal([in_size,out_size],stddev=1e-5),name='weights')
b = tf.Variable(tf.zeros([out_size]))
output = tf.matmul(x, w) + b

# calculate the loss between y and output
labels = tf.placeholder(tf.int32,shape=[None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
loss = tf.reduce_mean(cross_entropy,name='xentropy_mean')

# calculate the gradient the 
gradient_w = tf.gradients(output,w)
gradient_b = tf.gradients(output,b)

# add summary of gradient of w and b

summary_dir = 'summary'
summary = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(summary_dir,sess.graph)
# run the session 
init = tf.initialize_all_variables()

# feed the data
# [batch * in_size]
data = np.random.random([5,10])
feed_dict = {
loss_value =,

# get summary and display
# summary_str =,
#                        feed_dict={x:data})
# record one item

In [37]:,feed_dict={x:data})

[array([ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.], dtype=float32)]

