This notebook illustrates:
In [ ]:
!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst
In [ ]:
# Ensure the right version of Tensorflow is installed.
!pip freeze | grep tensorflow==2.1
In [ ]:
# change these to try this notebook out
BUCKET = 'cloud-training-demos-ml'
PROJECT = 'cloud-training-demos'
REGION = 'us-central1'
In [ ]:
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
In [ ]:
%%bash
if ! gsutil ls | grep -q gs://${BUCKET}/; then
gsutil mb -l ${REGION} gs://${BUCKET}
fi
In [ ]:
%%bash
ls *.csv
First, write an input_fn to read the data.
In [ ]:
import shutil
import numpy as np
import tensorflow as tf
print(tf.__version__)
In [ ]:
# Determine CSV, label, and key columns
CSV_COLUMNS = 'weight_pounds,is_male,mother_age,plurality,gestation_weeks,key'.split(',')
LABEL_COLUMN = 'weight_pounds'
KEY_COLUMN = 'key'
# Set default values for each CSV column
DEFAULTS = [[0.0], ['null'], [0.0], ['null'], [0.0], ['nokey']]
TRAIN_STEPS = 1000
In [ ]:
# Create an input function reading a file using the Dataset API
# Then provide the results to the Estimator API
def read_dataset(filename, mode, batch_size = 512):
def _input_fn():
def decode_csv(value_column):
columns = tf.compat.v1.decode_csv(value_column, record_defaults=DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
label = features.pop(LABEL_COLUMN)
return features, label
# Create list of files that match pattern
file_list = tf.compat.v1.gfile.Glob(filename)
# Create dataset from file list
dataset = (tf.compat.v1.data.TextLineDataset(file_list) # Read text file
.map(decode_csv)) # Transform each elem by applying decode_csv fn
if mode == tf.estimator.ModeKeys.TRAIN:
num_epochs = None # indefinitely
dataset = dataset.shuffle(buffer_size=10*batch_size)
else:
num_epochs = 1 # end-of-input after this
dataset = dataset.repeat(num_epochs).batch(batch_size)
return dataset
return _input_fn
Next, define the feature columns
In [ ]:
# Define feature columns
def get_categorical(name, values):
return tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(name, values))
def get_cols():
# Define column types
return [\
get_categorical('is_male', ['True', 'False', 'Unknown']),
tf.feature_column.numeric_column('mother_age'),
get_categorical('plurality',
['Single(1)', 'Twins(2)', 'Triplets(3)',
'Quadruplets(4)', 'Quintuplets(5)','Multiple(2+)']),
tf.feature_column.numeric_column('gestation_weeks')
]
To predict with the TensorFlow model, we also need a serving input function. We will want all the inputs from our user.
In [ ]:
# Create serving input function to be able to serve predictions later using provided inputs
def serving_input_fn():
feature_placeholders = {
'is_male': tf.compat.v1.placeholder(tf.string, [None]),
'mother_age': tf.compat.v1.placeholder(tf.float32, [None]),
'plurality': tf.compat.v1.placeholder(tf.string, [None]),
'gestation_weeks': tf.compat.v1.placeholder(tf.float32, [None])
}
features = {
key: tf.expand_dims(tensor, -1)
for key, tensor in feature_placeholders.items()
}
return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
In [ ]:
# Create estimator to train and evaluate
def train_and_evaluate(output_dir):
EVAL_INTERVAL = 300
run_config = tf.estimator.RunConfig(save_checkpoints_secs = EVAL_INTERVAL,
keep_checkpoint_max = 3)
estimator = tf.estimator.DNNRegressor(
model_dir = output_dir,
feature_columns = get_cols(),
hidden_units = [64, 32],
config = run_config)
train_spec = tf.estimator.TrainSpec(
input_fn = read_dataset('train.csv', mode = tf.estimator.ModeKeys.TRAIN),
max_steps = TRAIN_STEPS)
exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
eval_spec = tf.estimator.EvalSpec(
input_fn = read_dataset('eval.csv', mode = tf.estimator.ModeKeys.EVAL),
steps = None,
start_delay_secs = 60, # start evaluating after N seconds
throttle_secs = EVAL_INTERVAL, # evaluate every N seconds
exporters = exporter)
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Finally, train!
In [ ]:
# Run the model
shutil.rmtree('babyweight_trained', ignore_errors = True) # start fresh each time
tf.compat.v1.summary.FileWriterCache.clear()
train_and_evaluate('babyweight_trained')
The exporter directory contains the final model.
Copyright 2020 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License