For testing TensorFlow.


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import tensorflow as tf

In [2]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 18
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

Fit a line to a toy 2D problem.


In [3]:
import tensorflow as tf
import numpy as np

# Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3
x_data = np.random.rand(100).astype(np.float32)
y_data = x_data * 0.1 + 0.3

# Try to find values for W and b that compute y_data = W * x_data + b
# (We know that W should be 0.1 and b 0.3, but TensorFlow will
# figure that out for us.)
W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b = tf.Variable(tf.zeros([1]))
y = W * x_data + b

# Minimize the mean squared errors.
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)

# Before starting, initialize the variables.  We will 'run' this first.
init = tf.global_variables_initializer()

# Launch the graph.
sess = tf.Session()
sess.run(init)

# Fit the line.
for step in range(201):
    sess.run(train)
    if step % 20 == 0:
        print(step, sess.run(W), sess.run(b))

# Learns best fit is W: [0.1], b: [0.3]


(0, array([ 0.49247518], dtype=float32), array([ 0.13618892], dtype=float32))
(20, array([ 0.20455042], dtype=float32), array([ 0.24888943], dtype=float32))
(40, array([ 0.12865922], dtype=float32), array([ 0.28598964], dtype=float32))
(60, array([ 0.10785604], dtype=float32), array([ 0.29615951], dtype=float32))
(80, array([ 0.10215348], dtype=float32), array([ 0.29894724], dtype=float32))
(100, array([ 0.1005903], dtype=float32), array([ 0.29971144], dtype=float32))
(120, array([ 0.10016182], dtype=float32), array([ 0.29992092], dtype=float32))
(140, array([ 0.10004436], dtype=float32), array([ 0.29997832], dtype=float32))
(160, array([ 0.10001216], dtype=float32), array([ 0.29999408], dtype=float32))
(180, array([ 0.10000335], dtype=float32), array([ 0.29999837], dtype=float32))
(200, array([ 0.10000091], dtype=float32), array([ 0.29999956], dtype=float32))

MNIST


In [4]:
# https://www.tensorflow.org/tutorials/mnist/beginners/
from tensorflow.examples.tutorials.mnist import input_data
# one_hot = one-of-K coding of the labels
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

In [5]:
# A placeholder requires an instantiation for running the computation.
# None = the dimension can be of any length
x = tf.placeholder(tf.float32, [None, 784])

# A Variable is a modifiable tensor that lives in TensorFlow's graph of interacting operations. 
# tf.zeors(..) will be the initial values.
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# b is added to each row?
y = tf.nn.softmax(tf.matmul(x, W) + b)

In [6]:
# Loss function
# True labels
y_ = tf.placeholder(tf.float32, [None, 10])

# sum the second dimension
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1] ))

# optimizer 
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

In [7]:
# initialization operation (not run it yet)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [8]:
# 1000 training steps
for i in range(1000):
    # batch_xs is a numpy array of size 100 x 784
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

In [9]:
# check the predictions
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print sess.run(accuracy, feed_dict={x: mnist.test.images, y_:mnist.test.labels})


0.9176

Multilayer convolutional network


In [ ]:

TensorFlow will sum the derivatives when the function is vector-valued?


In [96]:
b = np.array([1.0, 2, 4])
x = np.array([4.0, 5, 6])

In [97]:
tf_b = tf.constant(b)
tf_x = tf.placeholder(tf.float64)
tf_dot = tf.reduce_sum(tf_x*tf_b)
tf_elem = tf_x*tf_b
tf_outer = tf.matmul(tf.reshape(tf_x, (-1, 1)), tf.reshape(tf_b, (1, -1)) )

tfg_dot = tf.gradients(tf_dot, [tf_x])[0]
tfg_elem = tf.gradients(tf_elem, [tf_x])[0]
tfg_outer = tf.gradients(tf_outer, [tf_x])[0]

In [86]:
with tf.Session() as sess:    
    dot = sess.run(tf_dot, feed_dict={tf_x: x})
    elem = sess.run(tf_elem, feed_dict={tf_x: x})
    outer = sess.run(tf_outer, feed_dict={tf_x: x})
    
    g_dot = sess.run(tfg_dot, feed_dict={tf_x: x})
    g_elem = sess.run(tfg_elem, feed_dict={tf_x: x})
    g_outer = sess.run(tfg_outer, feed_dict={tf_x: x})

In [87]:
print 'dot: {0}'.format(dot)
print 'elem: {0}'.format(elem)
print 'outer: \n{0}'.format(outer)
print
print 'g_dot: {0}'.format(g_dot)
print 'g_elem: {0}'.format(g_elem)
print 'g_outer: {0}'.format(g_outer)


dot: 38.0
elem: [  4.  10.  24.]
outer: 
[[  4.   8.  16.]
 [  5.  10.  20.]
 [  6.  12.  24.]]

g_dot: [ 1.  2.  4.]
g_elem: [ 1.  2.  4.]
g_outer: [ 7.  7.  7.]

The intuition is correct. In mathematics, $\frac{\partial x b^\top}{\partial x_i}$ is a matrix. But, TensorFlow sums all the elements in this matrix and treats the result as the derivative with respect to $x_i$. It then stacks all the derivatives with respect to $x_1, \ldots, x_d$. This is to say that $\frac{\partial }{\partial x}$ will always be of the size of $x$ regardless of the size of the numerator.

Computing the derivative of a vector-valued function


In [156]:
tf_reshape_outer = tf.reshape(tf_outer, (len(x)**2, -1))
tf_L = tf.unstack(tf_reshape_outer)
tfgs = [tf.gradients(elem, [tf_x])[0] for elem in tf_L]

In [157]:
tfgs


Out[157]:
[<tf.Tensor 'gradients_84/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_85/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_86/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_87/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_88/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_89/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_90/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_91/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>,
 <tf.Tensor 'gradients_92/Reshape_18_grad/Reshape:0' shape=<unknown> dtype=float64>]

In [158]:
with tf.Session() as sess:
    print sess.run(tfgs, feed_dict={tf_x: x})


[array([ 1.,  0.,  0.]), array([ 2.,  0.,  0.]), array([ 4.,  0.,  0.]), array([ 0.,  1.,  0.]), array([ 0.,  2.,  0.]), array([ 0.,  4.,  0.]), array([ 0.,  0.,  1.]), array([ 0.,  0.,  2.]), array([ 0.,  0.,  4.])]

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: