This notebook illustrates:
In [ ]:
# change these to try this notebook out
BUCKET = 'cloud-training-demos-ml'
PROJECT = 'cloud-training-demos'
REGION = 'us-central1'
In [ ]:
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
In [ ]:
%%bash
if ! gsutil ls | grep -q gs://${BUCKET}/; then
gsutil mb -l ${REGION} gs://${BUCKET}
fi
In [ ]:
import shutil
import numpy as np
import tensorflow as tf
In [ ]:
# Determine CSV, label, and key columns
CSV_COLUMNS = 'weight_pounds,is_male,mother_age,plurality,gestation_weeks,key'.split(',')
LABEL_COLUMN = 'weight_pounds'
KEY_COLUMN = 'key'
# Set default values for each CSV column
DEFAULTS = [[0.0], ['null'], [0.0], ['null'], [0.0], ['nokey']]
TRAIN_STEPS = 1000
In [ ]:
# Create an input function reading a file using the Dataset API
# Then provide the results to the Estimator API
def read_dataset(filename_pattern, mode, batch_size = 512):
def _input_fn():
def decode_csv(line_of_text):
# TODO #1: Use tf.decode_csv to parse the provided line
# TODO #2: Make a Python dict. The keys are the column names, the values are from the parsed data
# TODO #3: Return a tuple of features, label where features is a Python dict and label a float
return features, label
# TODO #4: Use tf.gfile.Glob to create list of files that match pattern
file_list = None
# Create dataset from file list
dataset = (tf.data.TextLineDataset(file_list) # Read text file
.map(decode_csv)) # Transform each elem by applying decode_csv fn
# TODO #5: In training mode, shuffle the dataset and repeat indefinitely
# (Look at the API for tf.data.dataset shuffle)
# The mode input variable will be tf.estimator.ModeKeys.TRAIN if in training mode
# Tell the dataset to provide data in batches of batch_size
# This will now return batches of features, label
return dataset
return _input_fn
In [ ]:
# Define feature columns
In [ ]:
# Create serving input function to be able to serve predictions later using provided inputs
def serving_input_fn():
feature_placeholders = {
'is_male': tf.placeholder(tf.string, [None]),
'mother_age': tf.placeholder(tf.float32, [None]),
'plurality': tf.placeholder(tf.string, [None]),
'gestation_weeks': tf.placeholder(tf.float32, [None])
}
features = {
key: tf.expand_dims(tensor, -1)
for key, tensor in feature_placeholders.items()
}
return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
In [ ]:
# Create estimator to train and evaluate
def train_and_evaluate(output_dir):
EVAL_INTERVAL = 300
run_config = tf.estimator.RunConfig(save_checkpoints_secs = EVAL_INTERVAL,
keep_checkpoint_max = 3)
# TODO #1: Create your estimator
estimator = None
train_spec = tf.estimator.TrainSpec(
# TODO #2: Call read_dataset passing in the training CSV file and the appropriate mode
input_fn = None,
max_steps = TRAIN_STEPS)
exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
eval_spec = tf.estimator.EvalSpec(
# TODO #3: Call read_dataset passing in the evaluation CSV file and the appropriate mode
input_fn = None,
steps = None,
start_delay_secs = 60, # start evaluating after N seconds
throttle_secs = EVAL_INTERVAL, # evaluate every N seconds
exporters = exporter)
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Finally, train!
In [ ]:
# Run the model
shutil.rmtree('babyweight_trained', ignore_errors = True) # start fresh each time
tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file
train_and_evaluate('babyweight_trained')
The exporter directory contains the final model.
To begin TensorBoard from within AI Platform Notebooks, click the + symbol in the top left corner and select the Tensorboard icon to create a new TensorBoard.
In TensorBoard, look at the learned embeddings. Are they getting clustered? How about the weights for the hidden layers? What if you run this longer? What happens if you change the batchsize?
Copyright 2017-2018 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License