In [1]:
#@title License header
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This notebook creates and trains a TensorFlow 2.0 model for recognizing handwritten digits using the MNIST dataset, then compiles and executes that trained model using IREE.
build --define=iree_tensorflow=true
python colab/start_colab_kernel.py
(see that file for additional instructions)
In [2]:
import os
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from pyiree.tf import compiler as ireec
from pyiree import rt as ireert
tf.compat.v1.enable_eager_execution()
SAVE_PATH = os.path.join(os.environ["HOME"], "saved_models")
os.makedirs(SAVE_PATH, exist_ok=True)
# Print version information for future notebook users to reference.
print("TensorFlow version: ", tf.__version__)
print("Numpy version: ", np.__version__)
In [3]:
#@title Notebook settings { run: "auto" }
#@markdown -----
#@markdown ### Configuration
backend_choice = "GPU (vulkan-spirv)" #@param [ "GPU (vulkan-spirv)", "CPU (VMLA)" ]
if backend_choice == "GPU (vulkan-spirv)":
backend_name = "vulkan-spirv"
driver_name = "vulkan"
else:
backend_name = "vmla"
driver_name = "vmla"
tf.print("Using IREE compiler backend '%s' and runtime driver '%s'" % (backend_name, driver_name))
#@markdown -----
#@markdown ### Training Parameters
#@markdown <sup>Batch size used to subdivide the training and evaluation samples</sup>
batch_size = 200 #@param { type: "slider", min: 10, max: 400 }
#@markdown <sup>Epochs for training/eval. Higher values take longer to run but generally produce more accurate models</sup>
num_epochs = 5 #@param { type: "slider", min: 1, max: 20 }
#@markdown -----
In [4]:
#@title Load MNIST dataset, setup training and evaluation
NUM_CLASSES = 10 # One per digit [0, 1, 2, ..., 9]
IMG_ROWS, IMG_COLS = 28, 28
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
tf.print("Loaded MNIST dataset!")
x_train = x_train.reshape(x_train.shape[0], IMG_ROWS, IMG_COLS, 1)
x_test = x_test.reshape(x_test.shape[0], IMG_ROWS, IMG_COLS, 1)
input_shape = (IMG_ROWS, IMG_COLS, 1)
# Scale pixel values from [0, 255] integers to [0.0, 1.0] floats.
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
steps_per_epoch = int(x_train.shape[0] / batch_size)
steps_per_eval = int(x_test.shape[0] / batch_size)
# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)
# Construct batched datasets for training/evaluation.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(batch_size, drop_remainder=True)
# Create a distribution strategy for the dataset (single machine).
strategy = tf.distribute.experimental.CentralStorageStrategy()
train_dist_ds = strategy.experimental_distribute_dataset(train_dataset)
test_dist_ds = strategy.experimental_distribute_dataset(test_dataset)
tf.print("Configured data for training and evaluation!")
tf.print(" sample shape: %s" % str(x_train[0].shape))
tf.print(" training samples: %s" % x_train.shape[0])
tf.print(" test samples: %s" % x_test.shape[0])
tf.print(" epochs: %s" % num_epochs)
tf.print(" steps/epoch: %s" % steps_per_epoch)
tf.print(" steps/eval : %s" % steps_per_eval)
tf.print("")
tf.print("Sample image from the dataset:")
SAMPLE_EXAMPLE_INDEX = 1
sample_image = x_test[SAMPLE_EXAMPLE_INDEX]
sample_image_batch = np.expand_dims(sample_image, axis=0)
sample_label = y_test[SAMPLE_EXAMPLE_INDEX]
plt.imshow(sample_image.reshape(IMG_ROWS, IMG_COLS))
plt.show()
tf.print("\nGround truth labels: %s" % str(sample_label))
In [5]:
#@title Define MNIST model architecture using tf.keras API
def simple_mnist_model(input_shape):
"""Creates a simple (multi-layer perceptron) MNIST model."""
model = tf.keras.models.Sequential()
# Flatten to a 1d array (e.g. 28x28 -> 784)
model.add(tf.keras.layers.Flatten(input_shape=input_shape))
# Fully-connected neural layer with 128 neurons, RELU activation
model.add(tf.keras.layers.Dense(128, activation='relu'))
# Fully-connected neural layer returning probability scores for each class
model.add(tf.keras.layers.Dense(10, activation='softmax'))
return model
In [6]:
#@title Train the Keras model
with strategy.scope():
model = simple_mnist_model(input_shape)
tf.print("Constructed Keras MNIST model, training...")
optimizer = tf.keras.optimizers.SGD(learning_rate=0.05)
training_loss = tf.keras.metrics.Mean("training_loss", dtype=tf.float32)
training_accuracy = tf.keras.metrics.CategoricalAccuracy(
"training_accuracy", dtype=tf.float32)
test_loss = tf.keras.metrics.Mean("test_loss", dtype=tf.float32)
test_accuracy = tf.keras.metrics.CategoricalAccuracy(
"test_accuracy", dtype=tf.float32)
@tf.function
def train_step(iterator):
"""Training StepFn."""
def step_fn(inputs):
"""Per-Replica StepFn."""
images, labels = inputs
with tf.GradientTape() as tape:
logits = model(images, training=True)
loss = tf.keras.losses.categorical_crossentropy(labels, logits)
loss = tf.reduce_mean(loss) / strategy.num_replicas_in_sync
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
training_loss.update_state(loss)
training_accuracy.update_state(labels, logits)
strategy.run(step_fn, args=(next(iterator),))
@tf.function
def test_step(iterator):
"""Evaluation StepFn."""
def step_fn(inputs):
images, labels = inputs
logits = model(images, training=False)
loss = tf.keras.losses.categorical_crossentropy(labels, logits)
loss = tf.reduce_mean(loss) / strategy.num_replicas_in_sync
test_loss.update_state(loss)
test_accuracy.update_state(labels, logits)
strategy.run(step_fn, args=(next(iterator),))
for epoch in range(0, num_epochs):
tf.print("Running epoch #%s" % (epoch + 1))
train_iterator = iter(train_dist_ds)
for step in range(steps_per_epoch):
train_step(train_iterator)
tf.print(" Training loss: %f, accuracy: %f" % (training_loss.result(), training_accuracy.result() * 100))
training_loss.reset_states()
training_accuracy.reset_states()
test_iterator = iter(test_dist_ds)
for step in range(steps_per_eval):
test_step(test_iterator)
tf.print(" Test loss : %f, accuracy: %f" % (test_loss.result(), test_accuracy.result() * 100))
test_loss.reset_states()
test_accuracy.reset_states()
tf.print("Completed training!")
tf.print("")
# Run a single prediction on the trained model
tf_prediction = model(sample_image_batch, training=False)
tf.print("Sample prediction:")
tf.print(tf_prediction[0] * 100.0, summarize=100)
tf.print("")
In [7]:
#@title Export the trained model as a SavedModel, with IREE-compatible settings
# Since the model was written in sequential style, explicitly wrap in a module.
saved_model_dir = "/tmp/mnist.sm"
inference_module = tf.Module()
inference_module.model = model
# Hack: Convert to static shape. Won't be necessary once dynamic shapes are in.
dynamic_input_shape = list(model.inputs[0].shape)
dynamic_input_shape[0] = 1 # Make fixed (batch=1)
# Produce a concrete function.
inference_module.predict = tf.function(
input_signature=[
tf.TensorSpec(dynamic_input_shape, model.inputs[0].dtype)])(
lambda x: model.call(x, training=False))
save_options = tf.saved_model.SaveOptions(save_debug_info=True)
tf.print("Exporting SavedModel to %s" % saved_model_dir)
tf.saved_model.save(inference_module, saved_model_dir, options=save_options)
In [8]:
#@title Load the SavedModel into IREE's compiler as MLIR mhlo
compiler_module = ireec.tf_load_saved_model(
saved_model_dir, exported_names=["predict"])
tf.print("Imported MLIR:\n", compiler_module.to_asm(large_element_limit=100))
# Write to a file for use outside of this notebook.
mnist_mlir_path = os.path.join(SAVE_PATH, "mnist.mlir")
with open(mnist_mlir_path, "wt") as output_file:
output_file.write(compiler_module.to_asm())
print("Wrote MLIR to path '%s'" % mnist_mlir_path)
In [9]:
#@title Compile the mhlo MLIR and prepare a context to execute it
# Compile the MLIR module into a VM module for execution
flatbuffer_blob = compiler_module.compile(target_backends=[backend_name])
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
# Register the module with a runtime context
config = ireert.Config(driver_name)
ctx = ireert.SystemContext(config=config)
ctx.add_module(vm_module)
In [10]:
#@title Execute the compiled module and compare the results with TensorFlow
# Invoke the 'predict' function with a single image as an argument
iree_prediction = ctx.modules.module.predict(sample_image_batch)
tf.print("IREE prediction ('%s' backend, '%s' driver):" % (backend_name, driver_name))
tf.print(tf.convert_to_tensor(iree_prediction[0]) * 100.0, summarize=100)
tf.print("")
tf.print("TensorFlow prediction:")
tf.print(tf_prediction[0] * 100.0, summarize=100)