In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline

In [4]:
# import time
tf.reset_default_graph()
sess = tf.InteractiveSession()

calculation loss function not in top layer

I just want to run dnn in mnist dataset to test my idea.

I choose mnist due to its not too easy and not too complex.


In [79]:
# first read data from mnist dataset
def readDataset(filename_queue):
    tf.train.string_input_producer(filename_queue)
    reader = tf.TextLineReader()
    return

In [80]:
import time
import math
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.examples.tutorials.mnist import mnist

In [81]:
train_dir = 'dataset/mnist'
# the datasets include traindata and testdata,
# so get traindata by using datasets.train for example.
# And get other dataset like before .
datasets = input_data.read_data_sets(train_dir=train_dir)


Extracting dataset/mnist/train-images-idx3-ubyte.gz
Extracting dataset/mnist/train-labels-idx1-ubyte.gz
Extracting dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting dataset/mnist/t10k-labels-idx1-ubyte.gz

construct dnn


In [82]:
def onelayer(inputs,layername,w_initial_value,b_initial_value,i):
    with tf.variable_scope(layername):
#         a = f(W*x+b) f refers to ReLU 
#         W = tf.Variable(name='weights',initial_value=w_initial_value,dtype=tf.float32)
        weights = tf.Variable(
        w_initial_value,
        name='weights')
#     record weights
        tf.histogram_summary('weights'+str(i),weights)
        
        biases = tf.Variable(name='biases',initial_value=b_initial_value)
#         record biases
        tf.histogram_summary('biases'+str(i),biases)
        
        outputs = tf.nn.relu(tf.matmul(inputs,weights)+biases)
        
        return outputs

In [83]:
# inputs contain batchsize * oneImage of data
# inputs = tf.placeholder(dtype='float32',shape=(None,mnist.IMAGE_PIXELS))

def inference(images,fcn):
#     fcn : num of units in each layers using `[,]` format except input and output layer 
#     outputs=None

    insize = mnist.IMAGE_PIXELS
    inputs = images
    
    for i in range(len(fcn)):
        
        outputs = onelayer(
            inputs,'fcn'+str(i),
            tf.truncated_normal([insize, fcn[i]],stddev=1.0 / math.sqrt(float(insize))),
            tf.zeros(fcn[i]),i)
        tf.histogram_summary('fcn'+str(i)+'/activations',outputs)
#        last layer's outputs is the next layer's inputs
        inputs = outputs
        insize = fcn[i]
#     get logit(probability of each class)
# num of classes is 10 for mnist
    logits = onelayer(
    inputs,'fcn'+str(i+1),
    tf.truncated_normal([insize, 10],stddev=1.0 / math.sqrt(float(insize))),
    tf.zeros(10),i+1)

    return logits

In [84]:
def getloss(logits,labels):
#     softmax loss function
# logits is [batch_size,num_classes] tensor and labels is [batch_size]
    labels = tf.to_int64(labels)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits, labels, name='xentropy')
    loss = tf.reduce_mean(cross_entropy,name='xentropy_mean')
    return loss

In [85]:
def training(loss,learning_rate):

    
#     add summary
#     tf.summary.scalar('loss', loss)
    tf.scalar_summary('loss',loss)
#     create optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
#     global_step 
    global_step = tf.Variable(0,name='global_step',trainable=False)
#     minimize the loss 
    train_op = optimizer.minimize(loss,global_step = global_step)
    
    return train_op

In [86]:
def evaluation(logits,labels):
#     correct is a bool tensor
# return a scalar int32 tensor
    correct = tf.nn.in_top_k(logits,labels,1)
#     get error rate in one batch
    return tf.reduce_sum(tf.cast(correct,tf.int32))

In [87]:
# def solve(datasets,fcn):
#     #     init
#     logits = inference(inputs)
    
#     init = tf.initialize_all_variables()
#     sess.run(init)

In [88]:
# create placeholder for images and labels at the number of batch_size
def placeholder_inputs(batch_size):
#     couvert dataset to images and labels
    images_placeholder = tf.placeholder(tf.float32,shape=(batch_size,mnist.IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32,shape=(batch_size))
    
    return images_placeholder,labels_placeholder

In [89]:
# then create method for feed placeholder
def fill_feed_dict(dataset,images_pl,labels_pl,batch_size):
#     dataset refers to train or test dataset

# feed bacth_size numbers of images and labels 
    images_feed,labels_feed = dataset.next_batch(batch_size)
    
    feed_dict = {
        images_pl:images_feed,
        labels_pl:labels_feed,
    }
    
    return feed_dict

In [90]:
def do_eval(sess,
           eval_correct,
           images_placeholder,
           labels_placeholder,
           data_set,batch_size):
#     run test in test_data
    true_count = 0
    steps_per_epoch = data_set.num_examples // batch_size
    num_examples = steps_per_epoch * batch_size
    for step in range(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,
                                  images_placeholder,
                                  labels_placeholder)
        true_count += sess.run(eval_correct,feed_dict=feed_dict)
    precision = true_count / num_examples
    print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' %
         (num_examples,true_count,precision))

In [93]:
train_dir = 'dataset/mnist'
summary_dir = 'dataset/mnist/summary'
def run_training(batch_size,fcn,learning_rate,iters):
    
    # the datasets include traindata and testdata,
    # so get traindata by using datasets.train for example.
    # And get other dataset like before .
    datasets = input_data.read_data_sets(train_dir=train_dir)
    with tf.Graph().as_default():
#         generate placeholders for images and labels
        images_placeholder,labels_placeholder = placeholder_inputs(
        batch_size)
        
#       build a Graph
        logits = inference(images_placeholder,fcn)
        
#         add loss option
        loss = getloss(logits,labels_placeholder)
        
#         tf.scalar_summary('loss',loss)
#         Add to the Graph the Ops that calculate and apply gradients
        train_op = training(loss,learning_rate)
        
#         Add the Op to compare the logits to the labels during evaluation
        eval_correct = evaluation(logits,labels_placeholder)
        
#         build the summary 
        summary = tf.merge_all_summaries()
        
#         initialize all variable
        init = tf.initialize_all_variables()
        
#         Create a saver for writing training checkpoints
        saver = tf.train.Saver()
        
#         Create a session for running Ops on the Graph
        sess = tf.Session()
        
#         Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.train.SummaryWriter(summary_dir,sess.graph)
        
#         run the Op to initialize the variables.
        sess.run(init)
    
#         Start the training loop.
        for step in range(iters):
            start_time = time.time()
            
#             feed data to placeholders(train_data)
            feed_dict = fill_feed_dict(datasets.train,
                                       images_placeholder,
                                       labels_placeholder,
                                      batch_size)
#             get the return value (tensor from the Graph)
            _, loss_value = sess.run([train_op,loss],
                                    feed_dict=feed_dict)
            
#             get the duration
            duration = time.time() - start_time
            
#         Add the summary often
            if(step%100 ==0):
                print('Step %d: loss = %.2f (%.3f sec)' % (step,loss_value,duration))
                
#                 update the events file.
                summary_str = sess.run(summary,
                                       feed_dict=feed_dict)
                summary_writer.add_summary(summary_str,step)
                summary_writer.flush()
                
#                 Save a checkpoint and evaluate the model periodically
                if(step + 1)% 1000 == 0 or (step + 1)==iters:
#                     set dir to save
                    checkpoint_file = os.path.join(summary_dir,'checkpoint')
                    saver.save(sess,checkpoint_file,global_step = step)
                    
#                     evaluate against the training set.
                    print('Validation Data Eval:')
                    do_eval(sess,
                           eval_correct,
                           images_placeholder,
                           labels_placeholder,
                           datasets.validation)

In [94]:
fcn = [12,24,24,32,32,32,32,128,128]
learning_rate = 0.01
iters = 10000
run_training(batch_size=100,fcn=fcn,learning_rate=learning_rate,iters=iters)


Extracting dataset/mnist/train-images-idx3-ubyte.gz
Extracting dataset/mnist/train-labels-idx1-ubyte.gz
Extracting dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting dataset/mnist/t10k-labels-idx1-ubyte.gz
Step 0: loss = 2.30 (0.022 sec)
Step 100: loss = 2.30 (0.002 sec)
Step 200: loss = 2.30 (0.002 sec)
Step 300: loss = 2.30 (0.005 sec)
Step 400: loss = 2.30 (0.003 sec)
Step 500: loss = 2.30 (0.002 sec)
Step 600: loss = 2.30 (0.002 sec)
Step 700: loss = 2.30 (0.007 sec)
Step 800: loss = 2.29 (0.007 sec)
Step 900: loss = 2.29 (0.002 sec)
Step 1000: loss = 2.28 (0.002 sec)
Step 1100: loss = 2.26 (0.051 sec)
Step 1200: loss = 2.24 (0.002 sec)
Step 1300: loss = 2.15 (0.003 sec)
Step 1400: loss = 2.15 (0.004 sec)
Step 1500: loss = 2.09 (0.004 sec)
Step 1600: loss = 2.03 (0.002 sec)
Step 1700: loss = 2.00 (0.002 sec)
Step 1800: loss = 2.00 (0.003 sec)
Step 1900: loss = 1.99 (0.002 sec)
Step 2000: loss = 1.97 (0.003 sec)
Step 2100: loss = 1.87 (0.002 sec)
Step 2200: loss = 1.82 (0.050 sec)
Step 2300: loss = 1.71 (0.002 sec)
Step 2400: loss = 1.66 (0.002 sec)
Step 2500: loss = 1.67 (0.003 sec)
Step 2600: loss = 1.43 (0.002 sec)
Step 2700: loss = 1.76 (0.004 sec)
Step 2800: loss = 1.29 (0.002 sec)
Step 2900: loss = 1.27 (0.003 sec)
Step 3000: loss = 1.14 (0.002 sec)
Step 3100: loss = 0.88 (0.005 sec)
Step 3200: loss = 0.95 (0.002 sec)
Step 3300: loss = 1.03 (0.065 sec)
Step 3400: loss = 0.76 (0.003 sec)
Step 3500: loss = 0.83 (0.003 sec)
Step 3600: loss = 0.86 (0.002 sec)
Step 3700: loss = 0.77 (0.004 sec)
Step 3800: loss = 0.96 (0.002 sec)
Step 3900: loss = 0.58 (0.004 sec)
Step 4000: loss = 0.77 (0.002 sec)
Step 4100: loss = 0.69 (0.003 sec)
Step 4200: loss = 0.89 (0.003 sec)
Step 4300: loss = 0.77 (0.002 sec)
Step 4400: loss = 0.68 (0.069 sec)
Step 4500: loss = 0.72 (0.003 sec)
Step 4600: loss = 0.70 (0.002 sec)
Step 4700: loss = 0.97 (0.004 sec)
Step 4800: loss = 0.67 (0.002 sec)
Step 4900: loss = 0.80 (0.002 sec)
Step 5000: loss = 0.55 (0.003 sec)
Step 5100: loss = 0.69 (0.002 sec)
Step 5200: loss = 0.48 (0.002 sec)
Step 5300: loss = 0.51 (0.003 sec)
Step 5400: loss = 0.53 (0.002 sec)
Step 5500: loss = 0.42 (0.057 sec)
Step 5600: loss = 0.52 (0.002 sec)
Step 5700: loss = 0.59 (0.002 sec)
Step 5800: loss = 0.66 (0.002 sec)
Step 5900: loss = 0.49 (0.002 sec)
Step 6000: loss = 0.62 (0.002 sec)
Step 6100: loss = 0.43 (0.002 sec)
Step 6200: loss = 0.61 (0.002 sec)
Step 6300: loss = 0.61 (0.002 sec)
Step 6400: loss = 0.63 (0.002 sec)
Step 6500: loss = 0.51 (0.002 sec)
Step 6600: loss = 0.64 (0.049 sec)
Step 6700: loss = 0.44 (0.002 sec)
Step 6800: loss = 0.47 (0.002 sec)
Step 6900: loss = 0.45 (0.002 sec)
Step 7000: loss = 0.37 (0.002 sec)
Step 7100: loss = 0.56 (0.002 sec)
Step 7200: loss = 0.60 (0.002 sec)
Step 7300: loss = 0.39 (0.002 sec)
Step 7400: loss = 0.45 (0.002 sec)
Step 7500: loss = 0.54 (0.002 sec)
Step 7600: loss = 0.63 (0.002 sec)
Step 7700: loss = 0.53 (0.051 sec)
Step 7800: loss = 0.41 (0.016 sec)
Step 7900: loss = 0.48 (0.002 sec)
Step 8000: loss = 0.44 (0.002 sec)
Step 8100: loss = 0.48 (0.002 sec)
Step 8200: loss = 0.35 (0.002 sec)
Step 8300: loss = 0.73 (0.002 sec)
Step 8400: loss = 0.45 (0.002 sec)
Step 8500: loss = 0.51 (0.003 sec)
Step 8600: loss = 0.27 (0.002 sec)
Step 8700: loss = 0.50 (0.002 sec)
Step 8800: loss = 0.26 (0.066 sec)
Step 8900: loss = 0.59 (0.002 sec)
Step 9000: loss = 0.40 (0.002 sec)
Step 9100: loss = 0.51 (0.002 sec)
Step 9200: loss = 0.51 (0.002 sec)
Step 9300: loss = 0.43 (0.002 sec)
Step 9400: loss = 0.33 (0.002 sec)
Step 9500: loss = 0.30 (0.002 sec)
Step 9600: loss = 0.48 (0.002 sec)
Step 9700: loss = 0.72 (0.003 sec)
Step 9800: loss = 0.43 (0.002 sec)
Step 9900: loss = 0.34 (0.058 sec)

In [21]:
np.zeros(2)


Out[21]:
array([ 0.,  0.])

In [22]:
tf.contrib.layers.fully_connected?

another practice for getting gradients of weights and biases


In [44]:
in_size= 10 
out_size=  10
x = tf.placeholder(tf.float32,shape=[None,in_size])
w = tf.Variable(tf.truncated_normal([in_size,out_size],stddev=1e-5),name='weights')
b = tf.Variable(tf.zeros([out_size]))
output = tf.matmul(x, w) + b

# calculate the loss between y and output
labels = tf.placeholder(tf.int32,shape=[None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
output,labels,name='xentropy')
loss = tf.reduce_mean(cross_entropy,name='xentropy_mean')

In [45]:
# calculate the gradient the 
gradient_w = tf.gradients(output,w)
gradient_b = tf.gradients(output,b)

In [47]:
# add summary of gradient of w and b
tf.histogram_summary('gradient_w',gradient_w)
tf.histogram_summary('gradient_b',gradient_b)
tf.scalar_summary('loss',loss)

summary_dir = 'summary'
summary = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(summary_dir,sess.graph)
# run the session 
init = tf.initialize_all_variables()
sess.run(init)

# feed the data
# [batch * in_size]
data = np.random.random([5,10])
feed_dict = {
    x:data,
    labels:np.random.randint([5]),
}
loss_value = sess.run(loss,
         feed_dict=feed_dict)

# get summary and display
# summary_str = sess.run(summary,
#                        feed_dict={x:data})
# record one item
summary_writer.add_summary(summary_str,1)
summary_writer.flush()


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-47-b664196c940a> in <module>()
     16 feed_dict = {
     17     x:data,
---> 18     labels:np.random.randint([5])}
     19 loss_value = sess.run(loss,
     20          feed_dict=feed_dict)

mtrand.pyx in mtrand.RandomState.randint (numpy/random/mtrand/mtrand.c:14458)()

ValueError: high is out of bounds for int64

In [37]:
sess.run(gradient_b,feed_dict={x:data})


Out[37]:
[array([ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.], dtype=float32)]

In [ ]: