In [0]:
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This tutorial demonstrates AI Platform's CloudTuner service.
CloudTuner is implemented into the KerasTuner and uses CAIP Optimizer Beta (Vizier as a backend) as an oracle to get suggested trials, run trials, etc. The usage of CloudTuner is the same as KerasTuner and additionally accept Optimizer's study_config as an alternative input.
This tutorial uses billable components of Google Cloud:
Learn about AI Platform Training pricing and Cloud Storage pricing, and use the Pricing Calculator to generate a cost estimate based on your projected usage.
Install additional dependencies not installed in the notebook environment.
In [0]:
! pip install google-cloud
! pip install google-cloud-storage
! pip install requests
! pip install tensorflow_datasets
The following steps are required, regardless of your notebook environment.
Select or create a Google Cloud project. When you first create an account, you get a $300 free credit towards your compute/storage costs.
If running locally on your own machine, you will need to install the Google Cloud SDK.
Note: Jupyter runs lines prefixed with ! as shell commands, and it interpolates Python variables prefixed with $ into these commands.
If you are using AI Platform Notebooks, your environment is already authenticated. Skip these steps.
In [0]:
import sys
# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.
if 'google.colab' in sys.modules:
from google.colab import auth as google_auth
google_auth.authenticate_user()
# If you are running this tutorial in a notebook locally, replace the string
# below with the path to your service account key and run this cell to
# authenticate your Google Cloud account.
else:
%env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json
# Log in to your account on Google Cloud
! gcloud auth application-default login
! gcloud auth login
In [0]:
! pip install tensorflow-enterprise-addons
In [0]:
# Restart the kernel after pip installs
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)
In [0]:
from tensorflow_enterprise_addons import cloudtuner
import kerastuner
REGION = 'us-central1'
PROJECT_ID = '[your-project-id]' #@param {type:"string"}
! gcloud config set project $PROJECT_ID
In [0]:
from tensorflow.keras.datasets import mnist
(x, y), (val_x, val_y) = mnist.load_data()
x = x.astype('float32') / 255.
val_x = val_x.astype('float32') / 255.
x = x[:10000]
y = y[:10000]
Next, we will define the hyperparameter model building function like one does for KerasTuner, where the following are tunable:
- number of layers
- the learning rate
Note that CloudTuner does not support adding hyperparameters in the model building function. Instead, the search space is configured by passing a hyperparameters argument when instantiating (constructing) the tuner.
In [0]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam
def build_model(hp):
model = Sequential()
model.add(Flatten(input_shape=(28, 28)))
# the number of layers is tunable
for _ in range(hp.get('num_layers')):
model.add(Dense(units=64, activation='relu'))
model.add(Dense(10, activation='softmax'))
# the learning rate is tunable
model.compile(
optimizer=Adam(lr=hp.get('learning_rate')),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
Next, we instantiate an instance of the CloudTuner. We will define our tuning hyperparameters and pass them into the constructor as the parameter hyperparameters.
We also set the objective ('accuracy') to measure the performance of each trial, and we shall keep the number of trials small (5) for the purpose of this demonstration.
In [0]:
# Configure the search space
HPS = kerastuner.engine.hyperparameters.HyperParameters()
HPS.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
HPS.Int('num_layers', 2, 10)
tuner = cloudtuner.CloudTuner(
build_model,
project_id=PROJECT_ID,
region=REGION,
objective='accuracy',
hyperparameters=HPS,
max_trials=5,
directory='tmp_dir/1')
Let's use the search_space_summary() method to display what the search space for this optimization study looks like.
In [0]:
tuner.search_space_summary()
In [0]:
tuner.search(x=x, y=y, epochs=10, validation_data=(val_x, val_y))
In [0]:
tuner.results_summary()
Now, let's get the best model from the study using the get_best_models() method. The parameter num specifies the topmost number of models. In our case, we set it to 1 for the best overall model. The method returns a list (of models), so we use index of 0 to get the model out of the list.
In [0]:
model = tuner.get_best_models(num_models=1)[0]
In [0]:
print(model)
print(model.weights)
In this example we will build training pipeline that uses tf.data.datasets for training the model.
In [0]:
import tensorflow as tf
import tensorflow_datasets as tfds
In [0]:
(ds_train, ds_test), ds_info = tfds.load(
'mnist',
split=['train', 'test'],
shuffle_files=True,
as_supervised=True,
with_info=True,
)
# tfds.load introduces a new logger which results in duplicate log messages.
# To mitigate this issue following removes Jupyter notebook root logger handler. More details @
# https://stackoverflow.com/questions/6729268/log-messages-appearing-twice-with-python-logging
import logging
logger = logging.getLogger()
logger.handlers = []
Build a training and evaluation pipeline using ds.map, ds.cache, ds.shuffle, ds.batch, and ds.prefetch. For more details on building high performance pipelines refer to data performance
In [0]:
def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label
ds_train = ds_train.map(
normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.map(
normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)
In [0]:
def build_pipeline_model(hp):
model = Sequential()
model.add(Flatten(input_shape=(28, 28, 1)))
# the number of layers is tunable
for _ in range(hp.get('num_layers')):
model.add(Dense(units=64, activation='relu'))
model.add(Dense(10, activation='softmax'))
# the learning rate is tunable
model.compile(
optimizer=Adam(lr=hp.get('learning_rate')),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
In [0]:
# Configure the search space
pipeline_HPS = kerastuner.engine.hyperparameters.HyperParameters()
pipeline_HPS.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
pipeline_HPS.Int('num_layers', 2, 10)
pipeline_tuner = cloudtuner.CloudTuner(
build_pipeline_model,
project_id=PROJECT_ID,
region=REGION,
objective='accuracy',
hyperparameters=pipeline_HPS,
max_trials=5,
directory='tmp_dir/2')
In [0]:
pipeline_tuner.search(x=ds_train, epochs=10, validation_data=ds_test)
In [0]:
pipeline_tuner.results_summary()
In [0]:
pipeline_model = pipeline_tuner.get_best_models(num_models=1)[0]
print(pipeline_model)
print(pipeline_model.weights)
Now, let's repeat this study but this time the search space is passed in as a Optimizer study_config.
Let's start by constructing the study config for optimizing the accuracy of the model with the hyperparameters number of layers and learning rate, just as we did before.
In [0]:
# Configure the search space
STUDY_CONFIG = {
'algorithm': 'ALGORITHM_UNSPECIFIED',
'metrics': [{
'goal': 'MAXIMIZE',
'metric': 'accuracy'
}],
'parameters': [{
'discrete_value_spec': {
'values': [0.0001, 0.001, 0.01]
},
'parameter': 'learning_rate',
'type': 'DISCRETE'
}, {
'integer_value_spec': {
'max_value': 10,
'min_value': 2
},
'parameter': 'num_layers',
'type': 'INTEGER'
}, {
'discrete_value_spec': {
'values': [32, 64, 96, 128]
},
'parameter': 'units',
'type': 'DISCRETE'
}],
'automatedStoppingConfig': {
'decayCurveStoppingConfig': {
'useElapsedTime': True
}
}
}
In [0]:
tuner = cloudtuner.CloudTuner(
build_model,
project_id=PROJECT_ID,
region=REGION,
study_config=STUDY_CONFIG,
max_trials=10,
directory='tmp_dir/3')
Let's use the search_space_summary() method to display what the search space for this optimization study looks like.
In [0]:
tuner.search_space_summary()
In [0]:
tuner.search(x=x, y=y, epochs=5, steps_per_epoch=2000, validation_steps=1000, validation_data=(val_x, val_y))
In [0]:
tuner.results_summary()
In [0]:
from multiprocessing.dummy import Pool
# If you are running this tutorial in a notebook locally, you may run multiple
# tuning loops concurrently using multi-processes instead of multi-threads.
# from multiprocessing import Pool
import time
import datetime
STUDY_ID = 'CloudTuner_study_{}'.format(
datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
def single_tuner(tuner_id):
"""Instantiate a `CloudTuner` and set up its `tuner_id`.
Args:
tuner_id: Integer.
Returns:
A CloudTuner.
"""
tuner = cloudtuner.CloudTuner(
build_model,
project_id=PROJECT_ID,
region=REGION,
objective='accuracy',
hyperparameters=HPS,
max_trials=18,
study_id=STUDY_ID,
directory=('tmp_dir/cloud/%s' % (STUDY_ID)))
tuner.tuner_id = str(tuner_id)
return tuner
def search_fn(tuner):
# Start searching from different time points for each worker to avoid `model.build` collision.
time.sleep(int(tuner.tuner_id)*2)
tuner.search(x=x, y=y, epochs=5, validation_data=(val_x, val_y), verbose=0)
return tuner
In [0]:
# Number of search loops we would like to run in parallel
num_parallel_trials = 4
tuners = [single_tuner(i) for i in range(num_parallel_trials)]
p = Pool(processes=num_parallel_trials)
result = p.map(search_fn, tuners)
p.close()
p.join()
In [0]:
result[0].results_summary()
To clean up all Google Cloud resources used in this project, you can delete the Google Cloud project you used for the tutorial.