Learning Objectives
Eager Execution
Eager mode evaluates operations immediatley and return concrete values immediately. To enable eager mode simply place tf.enable_eager_execution()
at the top of your code. We recommend using eager execution when prototyping as it is intuitive, easier to debug, and requires less boilerplate code.
Graph Execution
Graph mode is TensorFlow's default execution mode (although it will change to eager with TF 2.0). In graph mode operations only produce a symbolic graph which doesn't get executed until run within the context of a tf.Session(). This style of coding is less inutitive and has more boilerplate, however it can lead to performance optimizations and is particularly suited for distributing training across multiple devices. We recommend using delayed execution for performance sensitive production code.
In [ ]:
import tensorflow as tf
print(tf.__version__)
In [ ]:
tf.enable_eager_execution()
In [ ]:
a = tf.constant(value = [5, 3, 8], dtype = tf.int32)
b = tf.constant(value = [3, -1, 2], dtype = tf.int32)
c = tf.add(x = a, y = b)
print(c)
In [ ]:
c = a + b # this is equivalent to tf.add(a,b)
print(c)
In [ ]:
import numpy as np
a_py = [1,2] # native python list
b_py = [3,4] # native python list
a_np = np.array(object = [1,2]) # numpy array
b_np = np.array(object = [3,4]) # numpy array
a_tf = tf.constant(value = [1,2], dtype = tf.int32) # native TF tensor
b_tf = tf.constant(value = [3,4], dtype = tf.int32) # native TF tensor
for result in [tf.add(x = a_py, y = b_py), tf.add(x = a_np, y = b_np), tf.add(x = a_tf, y = b_tf)]:
print("Type: {}, Value: {}".format(type(result), result))
You can convert a native TF tensor to a NumPy array using .numpy()
In [ ]:
a_tf.numpy()
In [ ]:
X = tf.constant(value = [1,2,3,4,5,6,7,8,9,10], dtype = tf.float32)
Y = 2 * X + 10
print("X:{}".format(X))
print("Y:{}".format(Y))
In [ ]:
def loss_mse(X, Y, w0, w1):
Y_hat = w0 * X + w1
return tf.reduce_mean(input_tensor = (Y_hat - Y)**2)
To use gradient descent we need to take the partial derivative of the loss function with respect to each of the weights. We could manually compute the derivatives, but with Tensorflow's automatic differentiation capabilities we don't have to!
During gradient descent we think of the loss as a function of the parameters $w_0$ and $w_1$. Thus, we want to compute the partial derivative with respect to these variables. The params=[2,3]
argument tells TensorFlow to only compute derivatives with respect to the 2nd and 3rd arguments to the loss function (counting from 0, so really the 3rd and 4th).
In [ ]:
# Counting from 0, the 2nd and 3rd parameter to the loss function are our weights
grad_f = tf.contrib.eager.gradients_function(f = loss_mse, params=[2,3])
In [ ]:
STEPS = 1000
LEARNING_RATE = .02
# Initialize weights
w0 = tf.constant(value = 0.0, dtype = tf.float32)
w1 = tf.constant(value = 0.0, dtype = tf.float32)
for step in range(STEPS):
#1. Calculate gradients
d_w0, d_w1 = grad_f(X, Y, w0, w1)
#2. Update weights
w0 = w0 - d_w0 * LEARNING_RATE
w1 = w1 - d_w1 * LEARNING_RATE
#3. Periodically print MSE
if step % 100 == 0:
print("STEP: {} MSE: {}".format(step, loss_mse(X, Y, w0, w1)))
# Print final MSE and weights
print("STEP: {} MSE: {}".format(STEPS,loss_mse(X, Y, w0, w1)))
print("w0:{}".format(round(float(w0), 4)))
print("w1:{}".format(round(float(w1), 4)))
Try modelling a non-linear function such as: $y=xe^{-x^2}$
In [ ]:
X = tf.constant(value = np.linspace(0,2,1000), dtype = tf.float32)
Y = X*np.exp(-X**2) * X
from matplotlib import pyplot as plt
%matplotlib inline
plt.plot(X, Y)
In [ ]:
def make_features(X):
features = [X]
features.append(tf.ones_like(X)) # Bias.
features.append(tf.square(X))
features.append(tf.sqrt(X))
features.append(tf.exp(X))
return tf.stack(features, axis=1)
def make_weights(n_weights):
W = [tf.constant(value = 0.0, dtype = tf.float32) for _ in range(n_weights)]
return tf.expand_dims(tf.stack(W),-1)
def predict(X, W):
Y_hat = tf.matmul(X, W)
return tf.squeeze(Y_hat, axis=-1)
def loss_mse(X, Y, W):
Y_hat = predict(X, W)
return tf.reduce_mean(input_tensor = (Y_hat - Y)**2)
X = tf.constant(value = np.linspace(0,2,1000), dtype = tf.float32)
Y = np.exp(-X**2) * X
grad_f = tf.contrib.eager.gradients_function(f = loss_mse, params=[2])
In [ ]:
STEPS = 2000
LEARNING_RATE = .02
# Weights/features.
Xf = make_features(X)
# Xf = Xf[:,0:2] # Linear features only.
W = make_weights(Xf.get_shape()[1].value)
# For plotting
steps = []
losses = []
plt.figure()
for step in range(STEPS):
#1. Calculate gradients
dW = grad_f(Xf, Y, W)[0]
#2. Update weights
W -= dW * LEARNING_RATE
#3. Periodically print MSE
if step % 100 == 0:
loss = loss_mse(Xf, Y, W)
steps.append(step)
losses.append(loss)
plt.clf()
plt.plot(steps, losses)
# Print final MSE and weights
print("STEP: {} MSE: {}".format(STEPS,loss_mse(Xf, Y, W)))
# Plot results
plt.figure()
plt.plot(X, Y, label='actual')
plt.plot(X, predict(Xf, W), label='predicted')
plt.legend()
Copyright 2019 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License