For testing TensorFlow.
In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import tensorflow as tf
In [2]:
# font options
font = {
#'family' : 'normal',
#'weight' : 'bold',
'size' : 18
}
plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
In [3]:
import tensorflow as tf
import numpy as np
# Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3
x_data = np.random.rand(100).astype(np.float32)
y_data = x_data * 0.1 + 0.3
# Try to find values for W and b that compute y_data = W * x_data + b
# (We know that W should be 0.1 and b 0.3, but TensorFlow will
# figure that out for us.)
W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b = tf.Variable(tf.zeros([1]))
y = W * x_data + b
# Minimize the mean squared errors.
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)
# Before starting, initialize the variables. We will 'run' this first.
init = tf.global_variables_initializer()
# Launch the graph.
sess = tf.Session()
sess.run(init)
# Fit the line.
for step in range(201):
sess.run(train)
if step % 20 == 0:
print(step, sess.run(W), sess.run(b))
# Learns best fit is W: [0.1], b: [0.3]
In [4]:
# https://www.tensorflow.org/tutorials/mnist/beginners/
from tensorflow.examples.tutorials.mnist import input_data
# one_hot = one-of-K coding of the labels
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
In [5]:
# A placeholder requires an instantiation for running the computation.
# None = the dimension can be of any length
x = tf.placeholder(tf.float32, [None, 784])
# A Variable is a modifiable tensor that lives in TensorFlow's graph of interacting operations.
# tf.zeors(..) will be the initial values.
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
# b is added to each row?
y = tf.nn.softmax(tf.matmul(x, W) + b)
In [6]:
# Loss function
# True labels
y_ = tf.placeholder(tf.float32, [None, 10])
# sum the second dimension
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1] ))
# optimizer
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
In [7]:
# initialization operation (not run it yet)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
In [8]:
# 1000 training steps
for i in range(1000):
# batch_xs is a numpy array of size 100 x 784
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
In [9]:
# check the predictions
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print sess.run(accuracy, feed_dict={x: mnist.test.images, y_:mnist.test.labels})
In [ ]:
In [96]:
b = np.array([1.0, 2, 4])
x = np.array([4.0, 5, 6])
In [97]:
tf_b = tf.constant(b)
tf_x = tf.placeholder(tf.float64)
tf_dot = tf.reduce_sum(tf_x*tf_b)
tf_elem = tf_x*tf_b
tf_outer = tf.matmul(tf.reshape(tf_x, (-1, 1)), tf.reshape(tf_b, (1, -1)) )
tfg_dot = tf.gradients(tf_dot, [tf_x])[0]
tfg_elem = tf.gradients(tf_elem, [tf_x])[0]
tfg_outer = tf.gradients(tf_outer, [tf_x])[0]
In [86]:
with tf.Session() as sess:
dot = sess.run(tf_dot, feed_dict={tf_x: x})
elem = sess.run(tf_elem, feed_dict={tf_x: x})
outer = sess.run(tf_outer, feed_dict={tf_x: x})
g_dot = sess.run(tfg_dot, feed_dict={tf_x: x})
g_elem = sess.run(tfg_elem, feed_dict={tf_x: x})
g_outer = sess.run(tfg_outer, feed_dict={tf_x: x})
In [87]:
print 'dot: {0}'.format(dot)
print 'elem: {0}'.format(elem)
print 'outer: \n{0}'.format(outer)
print
print 'g_dot: {0}'.format(g_dot)
print 'g_elem: {0}'.format(g_elem)
print 'g_outer: {0}'.format(g_outer)
The intuition is correct. In mathematics, $\frac{\partial x b^\top}{\partial x_i}$ is a matrix. But, TensorFlow sums all the elements in this matrix and treats the result as the derivative with respect to $x_i$. It then stacks all the derivatives with respect to $x_1, \ldots, x_d$. This is to say that $\frac{\partial }{\partial x}$ will always be of the size of $x$ regardless of the size of the numerator.
In [156]:
tf_reshape_outer = tf.reshape(tf_outer, (len(x)**2, -1))
tf_L = tf.unstack(tf_reshape_outer)
tfgs = [tf.gradients(elem, [tf_x])[0] for elem in tf_L]
In [157]:
tfgs
Out[157]:
In [158]:
with tf.Session() as sess:
print sess.run(tfgs, feed_dict={tf_x: x})
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: