TensorFlow is a dynamic graph computation engine, that allows automatic differentiation of each node. Tensorflow is the default computational backend of the Keras library. I can also be used directly from Python to build deep learning models.
TensorFlow builds where nodes may be:
Note that we are using for this course the new version Tensorflow 2.0. This version cleaned the old cluttered api and uses by default dynamic graph of operations to make it natural to design a model interactively in Jupyter. Previously you defined the graph statically once, and then needed to evaluate it by feeding it some data. Now it is dynamically defined when executing imperative Python instructions which means that you can print any tensor at any moment or even use pdb.set_trace() to inspect intermediary values.
In [ ]:
import tensorflow as tf
a = tf.constant(3)
a
In [ ]:
c = tf.Variable(0)
b = tf.constant(2)
c = a + b
c
In [ ]:
A = tf.constant([[0, 1], [2, 3]], dtype=tf.float32)
A
A tf.Tensor can be converted to numpy the following way:
In [ ]:
A.numpy()
In [ ]:
b = tf.Variable([1, 2], dtype=tf.float32)
b
In [ ]:
tf.reshape(b, (-1, 1))
In [ ]:
tf.matmul(A, tf.reshape(b, (-1, 1)))
In [ ]:
x = tf.Variable([1, -4], dtype=tf.float32)
x
In [ ]:
def squared_norm(x):
# TODO: sum of the squared elements of x
pass
In [ ]:
# %load solutions/tf_squared_norm.py
In [ ]:
squared_norm(x)
In [ ]:
squared_norm(x).numpy()
In [ ]:
x = tf.Variable([1, -4], dtype=tf.float32)
with tf.GradientTape() as tape:
result = squared_norm(x)
variables = [x]
gradients = tape.gradient(result, variables)
gradients
In [ ]:
grad_x = gradients[0]
In [ ]:
x
We can apply a gradient step to modify x in place by taking one step of gradient descent:
In [ ]:
x.assign_sub(0.1 * grad_x)
x.numpy()
Execute the following gradient descent step many times consecutively to watch the decrease of the objective function and the values of x converging to the minimum of the squared_norm function.
Hit [ctrl]-[enter] several times to execute the same Jupyter notebook cell over and over again.
In [ ]:
with tf.GradientTape() as tape:
objective = squared_norm(x)
x.assign_sub(0.1 * tape.gradient(objective, [x])[0])
print(f"objective = {objective.numpy():e}")
print(f"x = {x.numpy()}")
To explicitely place tensors on a device, use context managers:
In [ ]:
with tf.device("CPU:0"):
x_cpu = tf.constant(3)
# with tf.device("GPU:0"):
# x_gpu = tf.constant(3)
x_cpu.device
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
digits = load_digits()
sample_index = 45
plt.figure(figsize=(3, 3))
plt.imshow(digits.images[sample_index], cmap=plt.cm.gray_r,
interpolation='nearest')
plt.title("image label: %d" % digits.target[sample_index]);
In [ ]:
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
data = np.asarray(digits.data, dtype='float32')
target = np.asarray(digits.target, dtype='int32')
X_train, X_test, y_train, y_test = train_test_split(
data, target, test_size=0.15, random_state=37)
# mean = 0 ; standard deviation = 1.0
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# print(scaler.mean_)
# print(scaler.scale_)
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)
TensorFlow provides dataset abstraction which makes it is to iterate over the data batch by batch:
In [ ]:
def gen_dataset(x, y, batch_size=128):
dataset = tf.data.Dataset.from_tensor_slices((x, y))
dataset = dataset.shuffle(buffer_size=10000, seed=42)
dataset = dataset.batch(batch_size=batch_size)
return dataset
In [ ]:
dataset = gen_dataset(X_train, y_train)
dataset
In [ ]:
batch_x, batch_y = next(iter(dataset))
batch_x.shape
In [ ]:
batch_y.shape
In [ ]:
# helper functions to use to test your randomly initialized model:
def init_weights(shape):
return tf.Variable(tf.random.normal(shape, stddev=0.01))
def accuracy(y_pred, y):
return np.mean(np.argmax(y_pred, axis=1) == y)
def test_model(model, x, y):
dataset = gen_dataset(x, y)
preds, targets = [], []
for batch_x, batch_y in dataset:
preds.append(model(batch_x).numpy())
targets.append(batch_y.numpy())
preds, targets = np.concatenate(preds), np.concatenate(targets)
return accuracy(preds, targets)
Define your model there, and then execute the following cell to train your model. Don't hesitate to tweak the hyperparameters.
In [ ]:
# hyperparams
batch_size = 32
hid_size = 15
learning_rate = 0.5
num_epochs = 10
input_size = X_train.shape[1]
output_size = 10
# build the model and weights
class MyModel:
def __init__(self, input_size, hid_size, output_size):
# TODO: randomly initialize all the internal variables of the model:
self.W_h = None # TODO
self.b_h = None # TODO
self.W_o = None # TODO
self.b_o = None # TODO
def __call__(self, inputs):
# TODO: this method should implement the forward pass with
# tensorflow operations: compute the outputs, that is the
# unnormalized predictions of the network for a give batch
# of inputs vectors.
# No need to implement the softmax operations as we will
# move it the loss function instead.
# Hint: you can use tf.matmul, tf.tanh, tf.sigmoid,
# arithmetic operations and so on.
return None
model = MyModel(input_size, hid_size, output_size)
In [ ]:
# %load solutions/tf_model.py
In [ ]:
test_model(model, X_test, y_test)
The following implements a training loop in Python. Note the use of tf.GradientTape to automatically compute the gradients of the loss w.r.t. the different parameters of the model:
In [ ]:
losses = []
for e in range(num_epochs):
train_dataset = gen_dataset(X_train, y_train, batch_size=batch_size)
for batch_x, batch_y in train_dataset:
# tf.GradientTape records the activation to compute the gradients:
with tf.GradientTape() as tape:
logits = model(batch_x)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(batch_y, logits))
losses.append(loss.numpy())
# Here we ask for the gradients of dL/dW_h, etc.
dW_h, db_h, dW_o, db_o = tape.gradient(
loss, [model.W_h, model.b_h, model.W_o, model.b_o])
# Update the weights as a Stochastic Gradient Descent would do:
model.W_h.assign_sub(learning_rate * dW_h)
model.b_h.assign_sub(learning_rate * db_h)
model.W_o.assign_sub(learning_rate * dW_o)
model.b_o.assign_sub(learning_rate * db_o)
train_acc = test_model(model, X_train, y_train)
test_acc = test_model(model, X_test, y_test)
print("Epoch {}, train_acc = {}, test_acc = {}".format(e, round(train_acc, 4), round(test_acc, 4)))
plt.plot(losses)
In [ ]:
test_model(model, X_test, y_test)
In [ ]:
test_model(model, X_train, y_train)
__init__ method)
In [ ]: