In [0]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import Sequential
We'll use a colorectal histology dataset that comes pre-installed with TF Datasets. First we'll extract the data as a TF Dataset. Then we'll load a VGG model without the top classification layers. Finally, we'll add layers specific to our classification problem so that the final model outputs a softmax classification corresponding with 1 of the 8 classes in our dataset.
In [0]:
# These images will be (150,150,3)
(train, validation, test), info = tfds.load(
'colorectal_histology',
split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
shuffle_files=True,
as_supervised=True,
with_info=True
)
In [53]:
# Some info on the original dataset
info
Out[53]:
In [0]:
# Utility for what each label corresponds with
label_map = ['tumor','stroma','complex','lympho','debris','mucosa', 'adipose']
In [56]:
# Preview 2 examples from our dataset
get_label_name = info.features['label'].int2str
for image, label in train.take(2):
plt.figure()
plt.imshow(np.array(image))
plt.title(get_label_name(label))
The labels in the original dataset are single scalar values (ranging from 0 to 7). We need to convert these to softmax arrays to train our model.
In [0]:
def label_format(image, label):
return (image, tf.one_hot(label, depth=8))
train = train.map(label_format)
validation = validation.map(label_format)
test = test.map(label_format)
In [0]:
# Create batches
train_batch = train.shuffle(500).batch(32)
val_batch = validation.batch(32)
test_batch = test.batch(32)
In [71]:
for image_batch, label_batch in train_batch.take(1):
pass
image_batch.shape
Out[71]:
In [0]:
# Load the VGG model and set trainable to false
vgg_model = tf.keras.applications.VGG19(
include_top=False,
weights='imagenet',
input_shape=((150,150,3)),
classifier_activation='softmax'
)
In [0]:
vgg_model.trainable = False
In [74]:
vgg_model.summary()
In [0]:
feature_batch = vgg_model(image_batch)
In [76]:
global_avg_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_avg = global_avg_layer(feature_batch)
print(feature_batch_avg.shape)
In [77]:
prediction_layer = tf.keras.layers.Dense(8, activation='softmax')
prediction_batch = prediction_layer(feature_batch_avg)
print(prediction_batch.shape)
In [0]:
# Build our new model, implementing transfer learning
colorectal_model = keras.Sequential([
vgg_model,
global_avg_layer,
prediction_layer
])
In [79]:
colorectal_model.summary()
In [0]:
colorectal_model.compile(optimizer='adam',
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
In [81]:
colorectal_model.fit(
train_batch,
validation_data=val_batch,
epochs=15
)
Out[81]:
We'll use the IMDB movie review dataset from TF Datasets. This contains 50k movie reviews with polarized sentiment. The goal is to train a model to predict whether a review is positive or negative. We'll use TF Hub to build the first layer of our model.
In [0]:
# Get the data and split into train, test, validate
reviews_train, reviews_validate, reviews_test = tfds.load(
'imdb_reviews',
split=('train[:80%]', 'train[80%:90%]', 'test'),
as_supervised=True
)
Since this is already formatted as a tf.Data.dataset
, we'll preview it by iterating over the first five examples. You should see the review and its corresponding rating. 0 is negative, 1 is positive. When we built our model, we'll use sigmoid as the output since this is a binary classification task.
In [38]:
for i in reviews_train.take(5):
print('Review text', i[0].numpy())
print('Review sentiment', i[1].numpy(), '\n')
Here we'll import a TF Hub module for text classification. Because we're using TF Hub, we can feed the data to our model directly as strings. We don't need to worry about preprocessing since TF Hub will handle converting the text to embeddings for us.
In [0]:
hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1",
input_shape=[], dtype=tf.string, trainable=True)
To see what the TF Hub layer is doing, let's see the embedding for an example sentence.
In [57]:
test_embedding = hub_layer(["I'm excited to try out transfer learning with TF Hub"])
print(test_embedding)
In [58]:
model = keras.Sequential([
hub_layer,
keras.layers.Dense(32, activation='relu'),
keras.layers.Dense(1, activation='sigmoid')
])
model.summary()
In [0]:
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
In [60]:
# Note: you can train for more epochs to get higher accuracy
model.fit(
reviews_train.shuffle(10000).batch(512),
validation_data=reviews_validate.batch(512),
epochs=15
)
Out[60]:
In [61]:
# Evaluate the model
results = model.evaluate(reviews_test.batch(512))
Here we'll generate predictions on example from our test dataset, and print out the first 10 predictions along with their corresponding review text.
The output of our model is a value between 0 and 1. Values close to 0 indicate a confident negative review, and values close to 1 indicate a confident positive review.
In [0]:
prediction = model.predict(reviews_test.batch(512))
In [55]:
for i,val in enumerate(reviews_test.take(10)):
print(val[0])
print(prediction[i][0])
print()
Copyright 2020 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License