In [0]:
#@title Copyright 2020 Google LLC. Double-click here for license information.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Like several of the previous Colabs, this Colab uses the California Housing Dataset.
In [0]:
#@title Run on TensorFlow 2.x
%tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals
In [0]:
#@title Import relevant modules
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import seaborn as sns
# The following lines adjust the granularity of reporting.
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format
print("Imported modules.")
In [0]:
train_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv")
train_df = train_df.reindex(np.random.permutation(train_df.index)) # shuffle the examples
test_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_test.csv")
In [0]:
#@title Convert raw values to their Z-scores
# Calculate the Z-scores of each column in the training set:
train_df_mean = train_df.mean()
train_df_std = train_df.std()
train_df_norm = (train_df - train_df_mean)/train_df_std
# Calculate the Z-scores of each column in the test set.
test_df_mean = test_df.mean()
test_df_std = test_df.std()
test_df_norm = (test_df - test_df_mean)/test_df_std
print("Normalized the values.")
The following code cell creates a feature layer containing three features:
latitude
X longitude
(a feature cross)median_income
population
This code cell specifies the features that you'll ultimately train the model on and how each of those features will be represented. The transformations (collected in my_feature_layer
) don't actually get applied until you pass a DataFrame to it, which will happen when we train the model.
In [0]:
# Create an empty list that will eventually hold all created feature columns.
feature_columns = []
# We scaled all the columns, including latitude and longitude, into their
# Z scores. So, instead of picking a resolution in degrees, we're going
# to use resolution_in_Zs. A resolution_in_Zs of 1 corresponds to
# a full standard deviation.
resolution_in_Zs = 0.3 # 3/10 of a standard deviation.
# Create a bucket feature column for latitude.
latitude_as_a_numeric_column = tf.feature_column.numeric_column("latitude")
latitude_boundaries = list(np.arange(int(min(train_df_norm['latitude'])),
int(max(train_df_norm['latitude'])),
resolution_in_Zs))
latitude = tf.feature_column.bucketized_column(latitude_as_a_numeric_column, latitude_boundaries)
# Create a bucket feature column for longitude.
longitude_as_a_numeric_column = tf.feature_column.numeric_column("longitude")
longitude_boundaries = list(np.arange(int(min(train_df_norm['longitude'])),
int(max(train_df_norm['longitude'])),
resolution_in_Zs))
longitude = tf.feature_column.bucketized_column(longitude_as_a_numeric_column,
longitude_boundaries)
# Create a feature cross of latitude and longitude.
latitude_x_longitude = tf.feature_column.crossed_column([latitude, longitude], hash_bucket_size=100)
crossed_feature = tf.feature_column.indicator_column(latitude_x_longitude)
feature_columns.append(crossed_feature)
# Represent median_income as a floating-point value.
median_income = tf.feature_column.numeric_column("median_income")
feature_columns.append(median_income)
# Represent population as a floating-point value.
population = tf.feature_column.numeric_column("population")
feature_columns.append(population)
# Convert the list of feature columns into a layer that will later be fed into
# the model.
my_feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
Before creating a deep neural net, find a baseline loss by running a simple linear regression model that uses the feature layer you just created.
In [0]:
#@title Define the plotting function.
def plot_the_loss_curve(epochs, mse):
"""Plot a curve of loss vs. epoch."""
plt.figure()
plt.xlabel("Epoch")
plt.ylabel("Mean Squared Error")
plt.plot(epochs, mse, label="Loss")
plt.legend()
plt.ylim([mse.min()*0.95, mse.max() * 1.03])
plt.show()
print("Defined the plot_the_loss_curve function.")
In [0]:
#@title Define functions to create and train a linear regression model
def create_model(my_learning_rate, feature_layer):
"""Create and compile a simple linear regression model."""
# Most simple tf.keras models are sequential.
model = tf.keras.models.Sequential()
# Add the layer containing the feature columns to the model.
model.add(feature_layer)
# Add one linear layer to the model to yield a simple linear regressor.
model.add(tf.keras.layers.Dense(units=1, input_shape=(1,)))
# Construct the layers into a model that TensorFlow can execute.
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=my_learning_rate),
loss="mean_squared_error",
metrics=[tf.keras.metrics.MeanSquaredError()])
return model
def train_model(model, dataset, epochs, batch_size, label_name):
"""Feed a dataset into the model in order to train it."""
# Split the dataset into features and label.
features = {name:np.array(value) for name, value in dataset.items()}
label = np.array(features.pop(label_name))
history = model.fit(x=features, y=label, batch_size=batch_size,
epochs=epochs, shuffle=True)
# Get details that will be useful for plotting the loss curve.
epochs = history.epoch
hist = pd.DataFrame(history.history)
rmse = hist["mean_squared_error"]
return epochs, rmse
print("Defined the create_model and train_model functions.")
Run the following code cell to invoke the the functions defined in the preceding two code cells. (Ignore the warning messages.)
Note: Because we've scaled all the input data, including the label, the resulting loss values will be much less than previous models.
Note: Depending on the version of TensorFlow, running this cell might generate WARNING messages. Please ignore these warnings.
In [0]:
# The following variables are the hyperparameters.
learning_rate = 0.01
epochs = 15
batch_size = 1000
label_name = "median_house_value"
# Establish the model's topography.
my_model = create_model(learning_rate, my_feature_layer)
# Train the model on the normalized training set.
epochs, mse = train_model(my_model, train_df_norm, epochs, batch_size, label_name)
plot_the_loss_curve(epochs, mse)
test_features = {name:np.array(value) for name, value in test_df_norm.items()}
test_label = np.array(test_features.pop(label_name)) # isolate the label
print("\n Evaluate the linear regression model against the test set:")
my_model.evaluate(x = test_features, y = test_label, batch_size=batch_size)
The create_model
function defines the topography of the deep neural net, specifying the following:
The create_model
function also defines the activation function of each layer.
In [0]:
def create_model(my_learning_rate, my_feature_layer):
"""Create and compile a simple linear regression model."""
# Most simple tf.keras models are sequential.
model = tf.keras.models.Sequential()
# Add the layer containing the feature columns to the model.
model.add(my_feature_layer)
# Describe the topography of the model by calling the tf.keras.layers.Dense
# method once for each layer. We've specified the following arguments:
# * units specifies the number of nodes in this layer.
# * activation specifies the activation function (Rectified Linear Unit).
# * name is just a string that can be useful when debugging.
# Define the first hidden layer with 20 nodes.
model.add(tf.keras.layers.Dense(units=20,
activation='relu',
name='Hidden1'))
# Define the second hidden layer with 12 nodes.
model.add(tf.keras.layers.Dense(units=12,
activation='relu',
name='Hidden2'))
# Define the output layer.
model.add(tf.keras.layers.Dense(units=1,
name='Output'))
model.compile(optimizer=tf.keras.optimizers.Adam(lr=my_learning_rate),
loss="mean_squared_error",
metrics=[tf.keras.metrics.MeanSquaredError()])
return model
The train_model
function trains the model from the input features and labels. The tf.keras.Model.fit method performs the actual training. The x
parameter of the fit
method is very flexible, enabling you to pass feature data in a variety of ways. The following implementation passes a Python dictionary in which:
longitude
, latitude
, and so on).Note: Although you are passing every feature to model.fit
, most of those values will be ignored. Only the features accessed by my_feature_layer
will actually be used to train the model.
In [0]:
def train_model(model, dataset, epochs, label_name,
batch_size=None):
"""Train the model by feeding it data."""
# Split the dataset into features and label.
features = {name:np.array(value) for name, value in dataset.items()}
label = np.array(features.pop(label_name))
history = model.fit(x=features, y=label, batch_size=batch_size,
epochs=epochs, shuffle=True)
# The list of epochs is stored separately from the rest of history.
epochs = history.epoch
# To track the progression of training, gather a snapshot
# of the model's mean squared error at each epoch.
hist = pd.DataFrame(history.history)
mse = hist["mean_squared_error"]
return epochs, mse
In [0]:
# The following variables are the hyperparameters.
learning_rate = 0.01
epochs = 20
batch_size = 1000
# Specify the label
label_name = "median_house_value"
# Establish the model's topography.
my_model = create_model(learning_rate, my_feature_layer)
# Train the model on the normalized training set. We're passing the entire
# normalized training set, but the model will only use the features
# defined by the feature_layer.
epochs, mse = train_model(my_model, train_df_norm, epochs,
label_name, batch_size)
plot_the_loss_curve(epochs, mse)
# After building a model against the training set, test that model
# against the test set.
test_features = {name:np.array(value) for name, value in test_df_norm.items()}
test_label = np.array(test_features.pop(label_name)) # isolate the label
print("\n Evaluate the new model against the test set:")
my_model.evaluate(x = test_features, y = test_label, batch_size=batch_size)
In [0]:
#@title Double-click to view a possible answer
# Assuming that the linear model converged and
# the deep neural net model also converged, please
# compare the test set loss for each.
# In our experiments, the loss of the deep neural
# network model was consistently lower than
# that of the linear regression model, which
# suggests that the deep neural network model
# will make better predictions than the
# linear regression model.
Experiment with the number of layers of the deep neural network and the number of nodes in each layer. Aim to achieve both of the following goals:
The two goals may be in conflict.
In [0]:
#@title Double-click to view a possible answer
# Many answers are possible. We noticed the
# following trends:
# * Two layers outperformed one layer, but
# three layers did not perform significantly
# better than two layers; two layers
# outperformed one layer.
# In other words, two layers seemed best.
# * Setting the topography as follows produced
# reasonably good results with relatively few
# nodes:
# * 10 nodes in the first layer.
# * 6 nodes in the second layer.
# As the number of nodes in each layer dropped
# below the preceding, test loss increased.
# However, depending on your application, hardware
# constraints, and the relative pain inflicted
# by a less accurate model, a smaller network
# (for example, 6 nodes in the first layer and
# 4 nodes in the second layer) might be
# acceptable.
Notice that the model's loss against the test set is much higher than the loss against the training set. In other words, the deep neural network is overfitting to the data in the training set. To reduce overfitting, regularize the model. The course has suggested several different ways to regularize a model, including:
Your task is to experiment with one or more regularization mechanisms to bring the test loss closer to the training loss (while still keeping test loss relatively low).
Note: When you add a regularization function to a model, you might need to tweak other hyperparameters.
To use L1 or L2 regularization on a hidden layer, specify the kernel_regularizer
argument to tf.keras.layers.Dense. Assign one of the following methods to this argument:
tf.keras.regularizers.l1
for L1 regularizationtf.keras.regularizers.l2
for L2 regularizationEach of the preceding methods takes an l
parameter, which adjusts the regularization rate. Assign a decimal value between 0 and 1.0 to l
; the higher the decimal, the greater the regularization. For example, the following applies L2 regularization at a strength of 0.05.
model.add(tf.keras.layers.Dense(units=20,
activation='relu',
kernel_regularizer=tf.keras.regularizers.l2(l=0.01),
name='Hidden1'))
You implement dropout regularization as a separate layer in the topography. For example, the following code demonstrates how to add a dropout regularization layer between the first hidden layer and the second hidden layer:
model.add(tf.keras.layers.Dense( *define first hidden layer*)
model.add(tf.keras.layers.Dropout(rate=0.25))
model.add(tf.keras.layers.Dense( *define second hidden layer*)
The rate
parameter to tf.keras.layers.Dropout specifies the fraction of nodes that the model should drop out during training.
In [0]:
#@title Double-click for a possible solution
# The following "solution" uses L2 regularization to bring training loss
# and test loss closer to each other. Many, many other solutions are possible.
def create_model(my_learning_rate, my_feature_layer):
"""Create and compile a simple linear regression model."""
# Discard any pre-existing version of the model.
model = None
# Most simple tf.keras models are sequential.
model = tf.keras.models.Sequential()
# Add the layer containing the feature columns to the model.
model.add(my_feature_layer)
# Describe the topography of the model.
# Implement L2 regularization in the first hidden layer.
model.add(tf.keras.layers.Dense(units=20,
activation='relu',
kernel_regularizer=tf.keras.regularizers.l2(0.04),
name='Hidden1'))
# Implement L2 regularization in the second hidden layer.
model.add(tf.keras.layers.Dense(units=12,
activation='relu',
kernel_regularizer=tf.keras.regularizers.l2(0.04),
name='Hidden2'))
# Define the output layer.
model.add(tf.keras.layers.Dense(units=1,
name='Output'))
model.compile(optimizer=tf.keras.optimizers.Adam(lr=my_learning_rate),
loss="mean_squared_error",
metrics=[tf.keras.metrics.MeanSquaredError()])
return model
# Call the new create_model function and the other (unchanged) functions.
# The following variables are the hyperparameters.
learning_rate = 0.007
epochs = 140
batch_size = 1000
label_name = "median_house_value"
# Establish the model's topography.
my_model = create_model(learning_rate, my_feature_layer)
# Train the model on the normalized training set.
epochs, mse = train_model(my_model, train_df_norm, epochs,
label_name, batch_size)
plot_the_loss_curve(epochs, mse)
test_features = {name:np.array(value) for name, value in test_df_norm.items()}
test_label = np.array(test_features.pop(label_name)) # isolate the label
print("\n Evaluate the new model against the test set:")
my_model.evaluate(x = test_features, y = test_label, batch_size=batch_size)