Chapter 9 – Up and running with TensorFlow
This notebook contains all the sample code and solutions to the exercices in chapter 9.
First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:
In [10]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals
# Common imports
import numpy as np
import os
import tensorflow as tf
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
# to make this notebook's output stable across runs
def reset_graph(seed=42):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "tensorflow"
def save_fig(fig_id, tight_layout=True):
path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
print("Saving figure", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format='png', dpi=300)
In [32]:
beetles_full = pd.read_csv('beetleTrainingData.csv')
# beetles = beetles_full.drop(['accuracy_num', 'accuracy_txt'], axis=1).as_matrix()
beetles_test_train = beetles_full[beetles_full.columns[~beetles_full.columns.str.contains('_RA')]].drop(['accuracy_num', 'accuracy_txt'], axis=1).as_matrix()
# beetles_target = beetles_full['accuracy'].as_matrix()
beetles_target_test_train = beetles_full.accuracy_txt.apply(lambda a: int(a == 'correct')).as_matrix()
# beetles_target = beetles_full.accuracy_num.as_matrix()
pca = PCA(n_components = 50)
beetles_pca_test_train = pca.fit_transform(beetles_test_train)
beetles_pca, beetles_test, beetles_target, beetles_target_test = train_test_split(
beetles_pca_test_train, beetles_target_test_train, test_size=0.33, random_state=42)
m, n = beetles_pca.shape
beetles_plus_bias = np.c_[np.ones((m, 1)), beetles_pca]
tm, tn = beetles_test.shape
beetles_test_plus_bias = np.c_[np.ones((tm, 1)), beetles_test]
X = beetles_plus_bias
np.linalg.inv(X.T.dot(X))
Out[32]:
In [25]:
pca.components_
Out[25]:
In [26]:
reset_graph()
X = tf.constant(beetles_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(beetles_target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)
with tf.Session() as sess:
theta_value = theta.eval()
In [27]:
theta_value
Out[27]:
In [28]:
# y_predict = X.dot(theta_best)
y_predict = tf.reduce_sum(tf.multiply(tf.transpose(theta_value), X))
y_predict
Out[28]:
In [29]:
with tf.Session() as sess:
print(y_predict.eval())
In [34]:
# y_predict = X.dot(theta_best)
y_predict = tf.reduce_sum(tf.multiply(tf.transpose(theta_value), beetles_test_plus_bias))
with tf.Session() as sess:
print(y_predict.eval())
y_predict
Out[34]:
Compare with pure NumPy
In [5]:
X = beetles_plus_bias
y = beetles_target.reshape(-1, 1)
theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
print(theta_numpy)
Compare with Scikit-Learn
In [6]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(beetles_pca, beetles_target.reshape(-1, 1))
print(np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T])
Gradient Descent requires scaling the feature vectors first. We could do this using TF, but let's just use Scikit-Learn for now.
In [35]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_beetles_data = scaler.fit_transform(beetles_pca)
scaled_beetles_data_plus_bias = np.c_[np.ones((m, 1)), scaled_beetles_data]
In [36]:
print(scaled_beetles_data_plus_bias.mean(axis=0))
print(scaled_beetles_data_plus_bias.mean(axis=1))
print(scaled_beetles_data_plus_bias.mean())
print(scaled_beetles_data_plus_bias.shape)
In [37]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_beetles_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(beetles_target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)
best_theta = theta.eval()
In [10]:
best_theta
Out[10]:
Same as above except for the gradients = ...
line:
In [38]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_beetles_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(beetles_target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
In [39]:
gradients = tf.gradients(mse, [theta])[0]
In [40]:
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)
best_theta = theta.eval()
print("Best theta:")
print(best_theta)
In [ ]:
How could you find the partial derivatives of the following function with regards to a
and b
?
In [14]:
def my_func(a, b):
z = 0
for i in range(100):
z = a * np.cos(z + i) + z * np.sin(b - i)
return z
In [15]:
my_func(0.2, 0.3)
Out[15]:
In [16]:
reset_graph()
a = tf.Variable(0.2, name="a")
b = tf.Variable(0.3, name="b")
z = tf.constant(0.0, name="z0")
for i in range(100):
z = a * tf.cos(z + i) + z * tf.sin(b - i)
grads = tf.gradients(z, [a, b])
init = tf.global_variables_initializer()
Let's compute the function at $a=0.2$ and $b=0.3$, and the partial derivatives at that point with regards to $a$ and with regards to $b$:
In [17]:
with tf.Session() as sess:
init.run()
print(z.eval())
print(sess.run(grads))
In [18]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_beetles_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(beetles_target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
In [19]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
In [20]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)
best_theta = theta.eval()
print("Best theta:")
print(best_theta)
In [21]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_beetles_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(beetles_target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
In [22]:
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
momentum=0.9)
In [23]:
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
In [24]:
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
sess.run(training_op)
best_theta = theta.eval()
print("Best theta:")
print(best_theta)
In [25]:
reset_graph()
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})
print(B_val_1)
In [26]:
print(B_val_2)
In [27]:
n_epochs = 1000
learning_rate = 0.01
In [28]:
reset_graph()
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
In [29]:
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
In [30]:
n_epochs = 10
In [31]:
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
In [32]:
def fetch_batch(epoch, batch_index, batch_size):
np.random.seed(epoch * n_batches + batch_index) # not shown in the book
indices = np.random.randint(m, size=batch_size) # not shown
X_batch = scaled_beetles_data_plus_bias[indices] # not shown
y_batch = beetles_target.reshape(-1, 1)[indices] # not shown
return X_batch, y_batch
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
In [33]:
best_theta
Out[33]:
In [34]:
reset_graph()
n_epochs = 1000 # not shown in the book
learning_rate = 0.01 # not shown
X = tf.constant(scaled_beetles_data_plus_bias, dtype=tf.float32, name="X") # not shown
y = tf.constant(beetles_target.reshape(-1, 1), dtype=tf.float32, name="y") # not shown
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions") # not shown
error = y_pred - y # not shown
mse = tf.reduce_mean(tf.square(error), name="mse") # not shown
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # not shown
training_op = optimizer.minimize(mse) # not shown
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval()) # not shown
save_path = saver.save(sess, "/tmp/my_model.ckpt")
sess.run(training_op)
best_theta = theta.eval()
save_path = saver.save(sess, "/tmp/my_model_final.ckpt")
In [35]:
best_theta
Out[35]:
In [36]:
with tf.Session() as sess:
saver.restore(sess, "/tmp/my_model_final.ckpt")
best_theta_restored = theta.eval() # not shown in the book
In [37]:
np.allclose(best_theta, best_theta_restored)
Out[37]:
If you want to have a saver that loads and restores theta
with a different name, such as "weights"
:
In [38]:
saver = tf.train.Saver({"weights": theta})
By default the saver also saves the graph structure itself in a second file with the extension .meta
. You can use the function tf.train.import_meta_graph()
to restore the graph structure. This function loads the graph into the default graph and returns a Saver
that can then be used to restore the graph state (i.e., the variable values):
In [39]:
reset_graph()
# notice that we start with an empty graph.
saver = tf.train.import_meta_graph("/tmp/my_model_final.ckpt.meta") # this loads the graph structure
theta = tf.get_default_graph().get_tensor_by_name("theta:0") # not shown in the book
with tf.Session() as sess:
saver.restore(sess, "/tmp/my_model_final.ckpt") # this restores the graph's state
best_theta_restored = theta.eval() # not shown in the book
In [40]:
np.allclose(best_theta, best_theta_restored)
Out[40]:
This means that you can import a pretrained model without having to have the corresponding Python code to build the graph. This is very handy when you keep tweaking and saving your model: you can load a previously saved model without having to search for the version of the code that built it.
In [41]:
from IPython.display import clear_output, Image, display, HTML
def strip_consts(graph_def, max_const_size=32):
"""Strip large constant values from graph_def."""
strip_def = tf.GraphDef()
for n0 in graph_def.node:
n = strip_def.node.add()
n.MergeFrom(n0)
if n.op == 'Const':
tensor = n.attr['value'].tensor
size = len(tensor.tensor_content)
if size > max_const_size:
tensor.tensor_content = b"<stripped %d bytes>"%size
return strip_def
def show_graph(graph_def, max_const_size=32):
"""Visualize TensorFlow graph."""
if hasattr(graph_def, 'as_graph_def'):
graph_def = graph_def.as_graph_def()
strip_def = strip_consts(graph_def, max_const_size=max_const_size)
code = """
<script>
function load() {{
document.getElementById("{id}").pbtxt = {data};
}}
</script>
<link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
<div style="height:600px">
<tf-graph-basic id="{id}"></tf-graph-basic>
</div>
""".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))
iframe = """
<iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
""".format(code.replace('"', '"'))
display(HTML(iframe))
In [42]:
show_graph(tf.get_default_graph())
In [43]:
reset_graph()
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
In [44]:
n_epochs = 1000
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
In [45]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
In [46]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
In [47]:
with tf.Session() as sess: # not shown in the book
sess.run(init) # not shown
for epoch in range(n_epochs): # not shown
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
if batch_index % 10 == 0:
summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
step = epoch * n_batches + batch_index
file_writer.add_summary(summary_str, step)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval() # not shown
In [48]:
file_writer.close()
In [49]:
best_theta
Out[49]:
In [50]:
reset_graph()
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
n_epochs = 1000
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
In [51]:
with tf.name_scope("loss") as scope:
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
In [52]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
In [53]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
if batch_index % 10 == 0:
summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
step = epoch * n_batches + batch_index
file_writer.add_summary(summary_str, step)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
file_writer.flush()
file_writer.close()
print("Best theta:")
print(best_theta)
In [54]:
print(error.op.name)
In [55]:
print(mse.op.name)
In [56]:
reset_graph()
a1 = tf.Variable(0, name="a") # name == "a"
a2 = tf.Variable(0, name="a") # name == "a_1"
with tf.name_scope("param"): # name == "param"
a3 = tf.Variable(0, name="a") # name == "param/a"
with tf.name_scope("param"): # name == "param_1"
a4 = tf.Variable(0, name="a") # name == "param_1/a"
for node in (a1, a2, a3, a4):
print(node.op.name)
An ugly flat code:
In [57]:
reset_graph()
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")
z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")
relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z1, 0., name="relu2") # Oops, cut&paste error! Did you spot it?
output = tf.add(relu1, relu2, name="output")
Much better, using a function to build the ReLUs:
In [58]:
reset_graph()
def relu(X):
w_shape = (int(X.get_shape()[1]), 1)
w = tf.Variable(tf.random_normal(w_shape), name="weights")
b = tf.Variable(0.0, name="bias")
z = tf.add(tf.matmul(X, w), b, name="z")
return tf.maximum(z, 0., name="relu")
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")
In [59]:
file_writer = tf.summary.FileWriter("logs/relu1", tf.get_default_graph())
Even better using name scopes:
In [60]:
reset_graph()
def relu(X):
with tf.name_scope("relu"):
w_shape = (int(X.get_shape()[1]), 1) # not shown in the book
w = tf.Variable(tf.random_normal(w_shape), name="weights") # not shown
b = tf.Variable(0.0, name="bias") # not shown
z = tf.add(tf.matmul(X, w), b, name="z") # not shown
return tf.maximum(z, 0., name="max") # not shown
In [61]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter("logs/relu2", tf.get_default_graph())
file_writer.close()
Sharing a threshold
variable the classic way, by defining it outside of the relu()
function then passing it as a parameter:
In [62]:
reset_graph()
def relu(X, threshold):
with tf.name_scope("relu"):
w_shape = (int(X.get_shape()[1]), 1) # not shown in the book
w = tf.Variable(tf.random_normal(w_shape), name="weights") # not shown
b = tf.Variable(0.0, name="bias") # not shown
z = tf.add(tf.matmul(X, w), b, name="z") # not shown
return tf.maximum(z, threshold, name="max")
threshold = tf.Variable(0.0, name="threshold")
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X, threshold) for i in range(5)]
output = tf.add_n(relus, name="output")
In [63]:
reset_graph()
def relu(X):
with tf.name_scope("relu"):
if not hasattr(relu, "threshold"):
relu.threshold = tf.Variable(0.0, name="threshold")
w_shape = int(X.get_shape()[1]), 1 # not shown in the book
w = tf.Variable(tf.random_normal(w_shape), name="weights") # not shown
b = tf.Variable(0.0, name="bias") # not shown
z = tf.add(tf.matmul(X, w), b, name="z") # not shown
return tf.maximum(z, relu.threshold, name="max")
In [64]:
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")
In [65]:
reset_graph()
with tf.variable_scope("relu"):
threshold = tf.get_variable("threshold", shape=(),
initializer=tf.constant_initializer(0.0))
In [66]:
with tf.variable_scope("relu", reuse=True):
threshold = tf.get_variable("threshold")
In [67]:
with tf.variable_scope("relu") as scope:
scope.reuse_variables()
threshold = tf.get_variable("threshold")
In [68]:
reset_graph()
def relu(X):
with tf.variable_scope("relu", reuse=True):
threshold = tf.get_variable("threshold")
w_shape = int(X.get_shape()[1]), 1 # not shown
w = tf.Variable(tf.random_normal(w_shape), name="weights") # not shown
b = tf.Variable(0.0, name="bias") # not shown
z = tf.add(tf.matmul(X, w), b, name="z") # not shown
return tf.maximum(z, threshold, name="max")
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("relu"):
threshold = tf.get_variable("threshold", shape=(),
initializer=tf.constant_initializer(0.0))
relus = [relu(X) for relu_index in range(5)]
output = tf.add_n(relus, name="output")
In [69]:
file_writer = tf.summary.FileWriter("logs/relu6", tf.get_default_graph())
file_writer.close()
In [70]:
reset_graph()
def relu(X):
with tf.variable_scope("relu"):
threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
w_shape = (int(X.get_shape()[1]), 1)
w = tf.Variable(tf.random_normal(w_shape), name="weights")
b = tf.Variable(0.0, name="bias")
z = tf.add(tf.matmul(X, w), b, name="z")
return tf.maximum(z, threshold, name="max")
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("", default_name="") as scope:
first_relu = relu(X) # create the shared variable
scope.reuse_variables() # then reuse it
relus = [first_relu] + [relu(X) for i in range(4)]
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter("logs/relu8", tf.get_default_graph())
file_writer.close()
In [71]:
reset_graph()
def relu(X):
threshold = tf.get_variable("threshold", shape=(),
initializer=tf.constant_initializer(0.0))
w_shape = (int(X.get_shape()[1]), 1) # not shown in the book
w = tf.Variable(tf.random_normal(w_shape), name="weights") # not shown
b = tf.Variable(0.0, name="bias") # not shown
z = tf.add(tf.matmul(X, w), b, name="z") # not shown
return tf.maximum(z, threshold, name="max")
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = []
for relu_index in range(5):
with tf.variable_scope("relu", reuse=(relu_index >= 1)) as scope:
relus.append(relu(X))
output = tf.add_n(relus, name="output")
In [72]:
file_writer = tf.summary.FileWriter("logs/relu9", tf.get_default_graph())
file_writer.close()
In [73]:
reset_graph()
with tf.variable_scope("my_scope"):
x0 = tf.get_variable("x", shape=(), initializer=tf.constant_initializer(0.))
x1 = tf.Variable(0., name="x")
x2 = tf.Variable(0., name="x")
with tf.variable_scope("my_scope", reuse=True):
x3 = tf.get_variable("x")
x4 = tf.Variable(0., name="x")
with tf.variable_scope("", default_name="", reuse=True):
x5 = tf.get_variable("my_scope/x")
print("x0:", x0.op.name)
print("x1:", x1.op.name)
print("x2:", x2.op.name)
print("x3:", x3.op.name)
print("x4:", x4.op.name)
print("x5:", x5.op.name)
print(x0 is x3 and x3 is x5)
The first variable_scope()
block first creates the shared variable x0
, named my_scope/x
. For all operations other than shared variables (including non-shared variables), the variable scope acts like a regular name scope, which is why the two variables x1
and x2
have a name with a prefix my_scope/
. Note however that TensorFlow makes their names unique by adding an index: my_scope/x_1
and my_scope/x_2
.
The second variable_scope()
block reuses the shared variables in scope my_scope
, which is why x0 is x3
. Once again, for all operations other than shared variables it acts as a named scope, and since it's a separate block from the first one, the name of the scope is made unique by TensorFlow (my_scope_1
) and thus the variable x4
is named my_scope_1/x
.
The third block shows another way to get a handle on the shared variable my_scope/x
by creating a variable_scope()
at the root scope (whose name is an empty string), then calling get_variable()
with the full name of the shared variable (i.e. "my_scope/x"
).
In [74]:
reset_graph()
text = np.array("Do you want some café?".split())
text_tensor = tf.constant(text)
with tf.Session() as sess:
print(text_tensor.eval())
In [75]:
class Const(object):
def __init__(self, value):
self.value = value
def evaluate(self):
return self.value
def __str__(self):
return str(self.value)
class Var(object):
def __init__(self, init_value, name):
self.value = init_value
self.name = name
def evaluate(self):
return self.value
def __str__(self):
return self.name
class BinaryOperator(object):
def __init__(self, a, b):
self.a = a
self.b = b
class Add(BinaryOperator):
def evaluate(self):
return self.a.evaluate() + self.b.evaluate()
def __str__(self):
return "{} + {}".format(self.a, self.b)
class Mul(BinaryOperator):
def evaluate(self):
return self.a.evaluate() * self.b.evaluate()
def __str__(self):
return "({}) * ({})".format(self.a, self.b)
x = Var(3, name="x")
y = Var(4, name="y")
f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2
print("f(x,y) =", f)
print("f(3,4) =", f.evaluate())
In [76]:
df_dx = Mul(Const(2), Mul(x, y)) # df/dx = 2xy
df_dy = Add(Mul(x, x), Const(1)) # df/dy = x² + 1
print("df/dx(3,4) =", df_dx.evaluate())
print("df/dy(3,4) =", df_dy.evaluate())
In [77]:
def gradients(func, vars_list, eps=0.0001):
partial_derivatives = []
base_func_eval = func.evaluate()
for var in vars_list:
original_value = var.value
var.value = var.value + eps
tweaked_func_eval = func.evaluate()
var.value = original_value
derivative = (tweaked_func_eval - base_func_eval) / eps
partial_derivatives.append(derivative)
return partial_derivatives
df_dx, df_dy = gradients(f, [x, y])
print("df/dx(3,4) =", df_dx)
print("df/dy(3,4) =", df_dy)
In [78]:
Const.derive = lambda self, var: Const(0)
Var.derive = lambda self, var: Const(1) if self is var else Const(0)
Add.derive = lambda self, var: Add(self.a.derive(var), self.b.derive(var))
Mul.derive = lambda self, var: Add(Mul(self.a, self.b.derive(var)), Mul(self.a.derive(var), self.b))
x = Var(3.0, name="x")
y = Var(4.0, name="y")
f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2
df_dx = f.derive(x) # 2xy
df_dy = f.derive(y) # x² + 1
print("df/dx(3,4) =", df_dx.evaluate())
print("df/dy(3,4) =", df_dy.evaluate())
In [79]:
class DualNumber(object):
def __init__(self, value=0.0, eps=0.0):
self.value = value
self.eps = eps
def __add__(self, b):
return DualNumber(self.value + self.to_dual(b).value,
self.eps + self.to_dual(b).eps)
def __radd__(self, a):
return self.to_dual(a).__add__(self)
def __mul__(self, b):
return DualNumber(self.value * self.to_dual(b).value,
self.eps * self.to_dual(b).value + self.value * self.to_dual(b).eps)
def __rmul__(self, a):
return self.to_dual(a).__mul__(self)
def __str__(self):
if self.eps:
return "{:.1f} + {:.1f}ε".format(self.value, self.eps)
else:
return "{:.1f}".format(self.value)
def __repr__(self):
return str(self)
@classmethod
def to_dual(cls, n):
if hasattr(n, "value"):
return n
else:
return cls(n)
$3 + (3 + 4 \epsilon) = 6 + 4\epsilon$
In [80]:
3 + DualNumber(3, 4)
Out[80]:
$(3 + 4ε)\times(5 + 7ε) = 3 \times 5 + 3 \times 7ε + 4ε \times 5 + 4ε \times 7ε = 15 + 21ε + 20ε + 28ε^2 = 15 + 41ε + 28 \times 0 = 15 + 41ε$
In [81]:
DualNumber(3, 4) * DualNumber(5, 7)
Out[81]:
In [82]:
x.value = DualNumber(3.0)
y.value = DualNumber(4.0)
f.evaluate()
Out[82]:
In [83]:
x.value = DualNumber(3.0, 1.0) # 3 + ε
y.value = DualNumber(4.0) # 4
df_dx = f.evaluate().eps
x.value = DualNumber(3.0) # 3
y.value = DualNumber(4.0, 1.0) # 4 + ε
df_dy = f.evaluate().eps
In [84]:
df_dx
Out[84]:
In [85]:
df_dy
Out[85]:
In [86]:
class Const(object):
def __init__(self, value):
self.value = value
def evaluate(self):
return self.value
def backpropagate(self, gradient):
pass
def __str__(self):
return str(self.value)
class Var(object):
def __init__(self, init_value, name):
self.value = init_value
self.name = name
self.gradient = 0
def evaluate(self):
return self.value
def backpropagate(self, gradient):
self.gradient += gradient
def __str__(self):
return self.name
class BinaryOperator(object):
def __init__(self, a, b):
self.a = a
self.b = b
class Add(BinaryOperator):
def evaluate(self):
self.value = self.a.evaluate() + self.b.evaluate()
return self.value
def backpropagate(self, gradient):
self.a.backpropagate(gradient)
self.b.backpropagate(gradient)
def __str__(self):
return "{} + {}".format(self.a, self.b)
class Mul(BinaryOperator):
def evaluate(self):
self.value = self.a.evaluate() * self.b.evaluate()
return self.value
def backpropagate(self, gradient):
self.a.backpropagate(gradient * self.b.value)
self.b.backpropagate(gradient * self.a.value)
def __str__(self):
return "({}) * ({})".format(self.a, self.b)
x = Var(3, name="x")
y = Var(4, name="y")
f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2
result = f.evaluate()
f.backpropagate(1.0)
print("f(x,y) =", f)
print("f(3,4) =", result)
print("df_dx =", x.gradient)
print("df_dy =", y.gradient)
In [87]:
reset_graph()
x = tf.Variable(3., name="x")
y = tf.Variable(4., name="y")
f = x*x*y + y + 2
gradients = tf.gradients(f, [x, y])
init = tf.global_variables_initializer()
with tf.Session() as sess:
init.run()
f_val, gradients_val = sess.run([f, gradients])
f_val, gradients_val
Out[87]:
In [ ]: