In [31]:
import tensorflow as tf
node1 = tf.constant(3.0, dtype=tf.float32)
node2 = tf.constant(4.0) #dtype float32 is a default
print(node1, node2)
In Tensorflow Core, nodes are operations or variables. They take in some tensors, and output tensors.
Nodes do not return a value, and are not evaluated until specifically run inside a Session.
In [17]:
sess = tf.Session()
print( sess.run([node1, node2]))
We can perform operations like so:
In [4]:
node3 = tf.add(node1, node2)
print sess.run(node3)
Instead of one data type for variables, Tensorflow breaks things into two types:
placeholder
- a promise to provide a value later. Use to feed training data into the graph. variable
- must be initialized with some value. Use for trainable variables like W
weights
In [5]:
# creating the tiniest graph
a = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)
adder_node = tf.add(a,b)
print(sess.run(adder_node, feed_dict = {a:3, b:4.5}))
print(sess.run(adder_node, {a:3, b:4.5}))
print(sess.run(adder_node, {a:[2,1], b:[4,3]})) # arrays are tensors too
Going back to our WX + b
linear model, we would use variable
for W
and b
, and placeholder
for the features X
In [28]:
# initialize the graph
W = tf.Variable([0.3], dtype=tf.float32)
b = tf.Variable([-.3], dtype=tf.float32)
x = tf.placeholder(tf.float32)
linear_model = W * x + b
# must run to initialize all the variables in this graph
# the global_variables_initializer() must be run within the context of a session
init = tf.global_variables_initializer()
print(sess.run(init))
In [25]:
# run the computational graph
print(sess.run(linear_model, {x:[1,2,3,4]}))
You can reassign a values in a variable
which you'll need to do every time you update any parameters.
In [30]:
updateW = tf.assign(W, [1.0])
print(sess.run(updateW * x + b, {x:[1,2,3,4]}))
In [ ]:
import numpy as np
np.random.seed(0)
# data dimensions N*D
N, D = 3, 4
# input features
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)
# computational nodes
a = x * y
b = a + z
c = np.sum(b)
# all the gradients
grad_c = 1.0
grad_b = grad_c * np.ones((N,D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x
In [14]:
import numpy as np
import tensorflow as tf
np.random.seed(0)
# data dimensions N*D
N, D = 3, 4
# placeholders for data feeds
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
z = tf.placeholder(tf.float32)
# nodes
a = x * y
b = a + z
c = tf.reduce_sum(b)
# gradients, so much easier!
grad_x, grad_y, grad_z = tf.gradients(c, [x,y,z])
# run the computational graph
with tf.Session() as sess:
# creating dummy training data
values = {
x: np.random.randn(N, D),
y: np.random.randn(N, D),
z: np.random.randn(N, D),
}
# running the graph!
out = sess.run([c, grad_x, grad_y, grad_z], feed_dict=values)
c_val, grad_x_val, grad_y_val, grad_z_val = out
print "c node value", c_val
print "x gradient value", grad_x_val
print "y gradient value", grad_y_val
print "z gradient value", grad_z_val