In [ ]:
import datetime
import os
import shutil
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, DenseFeatures
from tensorflow.keras.callbacks import TensorBoard
print(tf.__version__)
%matplotlib inline
In [ ]:
PROJECT = 'cloud-training-demos' # REPLACE WITH YOUR PROJECT ID
BUCKET = 'cloud-training-demos' # REPLACE WITH YOUR BUCKET NAME
REGION = 'us-central1' # REPLACE WITH YOUR BUCKET REGION e.g. us-central1
In [ ]:
# For Bash Code
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION
In [ ]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION
In [ ]:
!ls -l ../data/taxi-traffic*
In [ ]:
!head ../data/taxi-traffic*
In [ ]:
CSV_COLUMNS = [
'fare_amount',
'dayofweek',
'hourofday',
'pickup_longitude',
'pickup_latitude',
'dropoff_longitude',
'dropoff_latitude',
'traffic_last_5min'
]
LABEL_COLUMN = 'fare_amount'
DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]
def features_and_labels(row_data):
label = row_data.pop(LABEL_COLUMN)
features = row_data
return features, label
def create_dataset(pattern, batch_size=1, mode=tf.estimator.ModeKeys.EVAL):
dataset = tf.data.experimental.make_csv_dataset(
pattern, batch_size, CSV_COLUMNS, DEFAULTS)
dataset = dataset.map(features_and_labels)
if mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(buffer_size=1000).repeat()
# take advantage of multi-threading; 1=AUTOTUNE
dataset = dataset.prefetch(1)
return dataset
In [ ]:
INPUT_COLS = [
'dayofweek',
'hourofday',
'pickup_longitude',
'pickup_latitude',
'dropoff_longitude',
'dropoff_latitude',
'traffic_last_5min'
]
# Create input layer of feature columns
feature_columns = {
colname: tf.feature_column.numeric_column(colname)
for colname in INPUT_COLS
}
In [ ]:
# Build a keras DNN model using Sequential API
def build_model(dnn_hidden_units):
model = Sequential(DenseFeatures(feature_columns=feature_columns.values()))
for num_nodes in dnn_hidden_units:
model.add(Dense(units=num_nodes, activation="relu"))
model.add(Dense(units=1, activation="linear"))
# Create a custom evalution metric
def rmse(y_true, y_pred):
return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))
# Compile the keras model
model.compile(optimizer="adam", loss="mse", metrics=[rmse, "mse"])
return model
Next, we can call the build_model
to create the model. Here we'll have two hidden layers before our final output layer. And we'll train with the same parameters we used before.
In [ ]:
HIDDEN_UNITS = [32, 8]
model = build_model(dnn_hidden_units=HIDDEN_UNITS)
In [ ]:
BATCH_SIZE = 1000
NUM_TRAIN_EXAMPLES = 10000 * 6 # training dataset will repeat, wrap around
NUM_EVALS = 60 # how many times to evaluate
NUM_EVAL_EXAMPLES = 10000 # enough to get a reasonable sample
trainds = create_dataset(
pattern='../data/taxi-traffic-train*',
batch_size=BATCH_SIZE,
mode=tf.estimator.ModeKeys.TRAIN)
evalds = create_dataset(
pattern='../data/taxi-traffic-valid*',
batch_size=BATCH_SIZE,
mode=tf.estimator.ModeKeys.EVAL).take(NUM_EVAL_EXAMPLES//1000)
In [ ]:
%%time
steps_per_epoch = NUM_TRAIN_EXAMPLES // (BATCH_SIZE * NUM_EVALS)
LOGDIR = "./taxi_trained"
history = model.fit(x=trainds,
steps_per_epoch=steps_per_epoch,
epochs=NUM_EVALS,
validation_data=evalds,
callbacks=[TensorBoard(LOGDIR)])
In [ ]:
RMSE_COLS = ['rmse', 'val_rmse']
pd.DataFrame(history.history)[RMSE_COLS].plot()
In [ ]:
model.predict(x={"dayofweek": tf.convert_to_tensor([6]),
"hourofday": tf.convert_to_tensor([17]),
"pickup_longitude": tf.convert_to_tensor([-73.982683]),
"pickup_latitude": tf.convert_to_tensor([40.742104]),
"dropoff_longitude": tf.convert_to_tensor([-73.983766]),
"dropoff_latitude": tf.convert_to_tensor([40.755174]),
"traffic_last_5min": tf.convert_to_tensor([114])},
steps=1)
In [ ]:
OUTPUT_DIR = "./export/savedmodel"
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
EXPORT_PATH = os.path.join(OUTPUT_DIR,
datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
tf.saved_model.save(model, EXPORT_PATH) # with default serving function
os.environ['EXPORT_PATH'] = EXPORT_PATH
In [ ]:
%%bash
PROJECT=${PROJECT}
BUCKET=${BUCKET}
REGION=${REGION}
MODEL_NAME=taxifare
VERSION_NAME=traffic
if [[ $(gcloud ai-platform models list --format='value(name)' | grep $MODEL_NAME) ]]; then
echo "$MODEL_NAME already exists"
else
# create model
echo "Creating $MODEL_NAME"
gcloud ai-platform models create --regions=$REGION $MODEL_NAME
fi
if [[ $(gcloud ai-platform versions list --model $MODEL_NAME --format='value(name)' | grep $VERSION_NAME) ]]; then
echo "Deleting already existing $MODEL_NAME:$VERSION_NAME ... "
gcloud ai-platform versions delete --model=$MODEL_NAME $VERSION_NAME
echo "Please run this cell again if you don't see a Creating message ... "
sleep 2
fi
# create model
echo "Creating $MODEL_NAME:$VERSION_NAME"
gcloud ai-platform versions create --model=$MODEL_NAME $VERSION_NAME --async \
--framework=tensorflow --python-version=3.5 --runtime-version=1.14 \
--origin=${EXPORT_PATH} --staging-bucket=gs://$BUCKET
Copyright 2019 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License