This model uses transfer learning with TensorFlow Hub and Keras. It is based on https://www.tensorflow.org/tutorials/keras/text_classification_with_hub
It classifies movie reviews as positive or negative using the text of the review. The reviews come from an IMDB dataset that contains the text of 50,000 movie reviews from the Internet Movie Database. These are split into 25,000 reviews for training and 25,000 reviews for testing.
In [1]:
# Already installed if you are using Cloud AI Platform Notebooks
#!pip install -q tensorflow-hub
#!pip install -q tfds-nightly
In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
train_data, test_data = tfds.load(
name="imdb_reviews",
split=('train', 'test'),
as_supervised=True)
In [3]:
split = 3 # 1/4 records is validation
dataset_train = train_data.window(split, split + 1).flat_map(lambda *ds: ds[0] if len(ds) == 1 else tf.data.Dataset.zip(ds))
dataset_validation = train_data.skip(split).window(1, split + 1).flat_map(lambda *ds: ds[0] if len(ds) == 1 else tf.data.Dataset.zip(ds))
In [4]:
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[],
dtype=tf.string, trainable=True, name='full_text')
In [5]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu', name='h1_dense'))
model.add(tf.keras.layers.Dense(1, name='positive_review_logits'))
model.summary()
In [6]:
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(dataset_train.shuffle(10000).batch(512),
epochs=10,
validation_data=dataset_validation.batch(512),
verbose=1)
In [7]:
results = model.evaluate(test_data.batch(512), verbose=2)
for name, value in zip(model.metrics_names, results):
print("%s: %.3f" % (name, value))
In [28]:
review1 = 'The film is based on a prize-winning novel.' # neutral
review2 = 'The film is fast moving and has several great action scenes.' # positive
review3 = 'The film was very boring. I walked out half-way.' # negative
logits = model.predict(x=tf.constant([review1, review2, review3]))
print(logits)
In [36]:
## how big is the model in memory?
import sys
# From https://goshippo.com/blog/measure-real-size-any-python-object/
def get_size(obj, seen=None):
"""Recursively finds size of objects"""
size = sys.getsizeof(obj)
if seen is None:
seen = set()
obj_id = id(obj)
if obj_id in seen:
return 0
# Important mark as seen *before* entering recursion to gracefully handle
# self-referential objects
seen.add(obj_id)
try:
if isinstance(obj, dict):
size += sum([get_size(v, seen) for v in obj.values()])
size += sum([get_size(k, seen) for k in obj.keys()])
elif hasattr(obj, '__dict__'):
size += get_size(obj.__dict__, seen)
elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
size += sum([get_size(i, seen) for i in obj])
except:
pass
return size
print('{} MB'.format(get_size(model)/(1000*1000)))
In [8]:
import os, datetime, shutil
shutil.rmtree('export/default', ignore_errors=True)
export_path = os.path.join('export', 'default', 'sentiment_{}'.format(datetime.datetime.now().strftime("%Y%m%d_%H%M%S")))
model.save(export_path)
In [9]:
!find export/default
Note how much smaller the model itself is ... the assets and variables are constants and can be shared in thread-safe way
In [42]:
!ls -lh {export_path}/saved_model.pb
!ls -lh {export_path}/assets/tokens.txt
!ls -lh {export_path}/variables/variables.*
In [10]:
!saved_model_cli show --dir {export_path} --tag_set serve --signature_def serving_default
In [22]:
## illustrates how we can load this model and do inference based on the signature above
restored = tf.keras.models.load_model(export_path)
review1 = 'The film is based on a prize-winning novel.' # neutral
review2 = 'The film is fast moving and has several great action scenes.' # positive
review3 = 'The film was very boring. I walked out half-way.' # negative
infer = restored.signatures['serving_default']
outputs = infer(full_text_input=tf.constant([review1, review2, review3])) # note input name
logit = outputs['positive_review_logits'] # note output name
print(logit)
In [23]:
print(1 / (1 + np.exp(-logit))) # probability
In [13]:
@tf.function(input_signature=[tf.TensorSpec([None], dtype=tf.string)])
def add_prob(reviews):
logits = model(reviews, training=False) # the model is captured via closure
probs = tf.sigmoid(logits)
return {
'positive_review_logits' : logits,
'positive_review_probability' : probs
}
shutil.rmtree('export/probs', ignore_errors=True)
probs_export_path = os.path.join('export', 'probs', 'sentiment_{}'.format(datetime.datetime.now().strftime("%Y%m%d_%H%M%S")))
model.save(probs_export_path, signatures={'serving_default': add_prob})
In [14]:
!saved_model_cli show --dir {probs_export_path} --tag_set serve --signature_def serving_default
In [15]:
restored = tf.keras.models.load_model(probs_export_path)
infer = restored.signatures['serving_default']
outputs = infer(reviews=tf.constant([review1, review2, review3])) # note input name
probs = outputs['positive_review_probability'] # note output name
print(probs)
In [16]:
!find export/probs | head -2 | tail -1
In [17]:
%%bash
MODEL_LOCATION=$(find export/probs | head -2 | tail -1)
MODEL_NAME=imdb
MODEL_VERSION=v1
TFVERSION=2.1
REGION=us-central1
BUCKET=ai-analytics-solutions-kfpdemo
# create the model if it doesn't already exist
modelname=$(gcloud ai-platform models list | grep -w "$MODEL_NAME")
echo $modelname
if [ -z "$modelname" ]; then
echo "Creating model $MODEL_NAME"
gcloud ai-platform models create ${MODEL_NAME} --regions $REGION
else
echo "Model $MODEL_NAME already exists"
fi
# delete the model version if it already exists
modelver=$(gcloud ai-platform versions list --model "$MODEL_NAME" | grep -w "$MODEL_VERSION")
echo $modelver
if [ "$modelver" ]; then
echo "Deleting version $MODEL_VERSION"
yes | gcloud ai-platform versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
sleep 10
fi
echo "Creating version $MODEL_VERSION from $MODEL_LOCATION"
gcloud ai-platform versions create ${MODEL_VERSION} \
--model ${MODEL_NAME} --origin ${MODEL_LOCATION} --staging-bucket gs://${BUCKET} \
--runtime-version $TFVERSION
In [18]:
%%writefile input.json
{"reviews": "The film is based on a prize-winning novel."}
{"reviews": "The film is fast moving and has several great action scenes."}
{"reviews": "The film was very boring. I walked out half-way."}
In [19]:
!gcloud ai-platform predict --model imdb --json-instances input.json --version v1
In [20]:
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json
credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials,
discoveryServiceUrl = "https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json")
request_data = {"instances":
[
{"reviews": "The film is based on a prize-winning novel."},
{"reviews": "The film is fast moving and has several great action scenes."},
{"reviews": "The film was very boring. I walked out half-way."}
]
}
parent = "projects/{}/models/imdb".format("ai-analytics-solutions", "v1") # use default version
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {0}".format(response))
In [21]:
print(response['predictions'][0]['positive_review_probability'][0])
In [4]:
def stateless_fn(x):
return 3*x + 15
class Stateless:
def __init__(self):
self.weight = 3
self.bias = 15
def __call__(self, x):
return self.weight*x + self.bias
class State:
def __init__(self):
self.counter = 0
def __call__(self, x):
self.counter = self.counter + 1
if self.counter % 2 == 0:
return 3*x + 15
else:
return 3*x - 15
a1 = Stateless()
a = State()
print(stateless_fn(3))
print(stateless_fn(3))
print(a1(3))
print(a1(3))
print(a(3))
print(a(3))
print(a(3))
print(a(3))
In [ ]:
%%bigquery
CREATE OR REPLACE MODEL mlpatterns.neutral_3classes
OPTIONS(model_type='logistic_reg', input_label_cols=['health']) AS
SELECT
IF(apgar_1min = 10, 'Healthy', IF(apgar_1min >= 8, 'Neutral', 'NeedsAttention')) AS health,
plurality,
mother_age,
gestation_weeks,
ever_born
FROM `bigquery-public-data.samples.natality`
WHERE apgar_1min <= 10
This works, but it is too slow
In [48]:
%%bigquery
SELECT * FROM ML.PREDICT(MODEL mlpatterns.neutral_3classes,
(SELECT
2 AS plurality,
32 AS mother_age,
41 AS gestation_weeks,
1 AS ever_born
)
)
Out[48]:
Better is to export the model and then deploy that ...
In [ ]:
%%bash
BUCKET=ai-analytics-solutions-kfpdemo
bq extract -m --destination_format=ML_TF_SAVED_MODEL mlpatterns.neutral_3classes gs://${BUCKET}/export/baby_health
In [49]:
%%bash
TFVERSION=1.15
REGION=us-central1
BUCKET=ai-analytics-solutions-kfpdemo
MODEL_LOCATION=gs://${BUCKET}/export/baby_health
MODEL_NAME=babyhealth
MODEL_VERSION=v1
# create the model if it doesn't already exist
modelname=$(gcloud ai-platform models list | grep -w "$MODEL_NAME")
echo $modelname
if [ -z "$modelname" ]; then
echo "Creating model $MODEL_NAME"
gcloud ai-platform models create ${MODEL_NAME} --regions $REGION
else
echo "Model $MODEL_NAME already exists"
fi
# delete the model version if it already exists
modelver=$(gcloud ai-platform versions list --model "$MODEL_NAME" | grep -w "$MODEL_VERSION")
echo $modelver
if [ "$modelver" ]; then
echo "Deleting version $MODEL_VERSION"
yes | gcloud ai-platform versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
sleep 10
fi
echo "Creating version $MODEL_VERSION from $MODEL_LOCATION"
gcloud ai-platform versions create ${MODEL_VERSION} \
--model ${MODEL_NAME} --origin ${MODEL_LOCATION} --staging-bucket gs://${BUCKET} \
--runtime-version $TFVERSION
In [50]:
%%writefile input.json
{"plurality": 2, "mother_age": 32, "gestation_weeks": 41, "ever_born": 1}
In [51]:
!gcloud ai-platform predict --model babyhealth --json-instances input.json --version v1
Copyright 2020 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License