Copyright 2019 Google LLC. SPDX-License-Identifier: Apache-2.0
This notebook shows use of the What-If Tool to compare two text models that determine sentence toxicity, one of which has had some debiasing performed during training.
This notebook loads two pretrained toxicity models from ConversationAI and compares them on the wikipedia comments dataset.
This notebook also shows how the What-If Tool can be used on non-TensorFlow models. In this case, these models are keras models that do not use tensorflow Examples as an input format. These models can be analyzed in the What-If Tool by supplying a custom prediction function to WitWidget.
It also shows use of a user-provided custom distance function (for counterfactual analysis and datapoint similarity visualizations). The tf.Hub Universal Sentence Encoder is used to compute the similarity of the input text comments.
This notebook is only compatible with TensorFlow 2.0.0 and later versions.
In [0]:
#@title Install the What-If Tool widget if running in colab {display-mode: "form"}
# If running in colab then pip install, otherwise no need.
try:
import google.colab
!pip install --upgrade tensorflow>=2.0.0 witwidget
except Exception:
pass
In [0]:
#@title Download the pretrained keras model files
!curl -L https://storage.googleapis.com/what-if-tool-resources/computefest2019/cnn_wiki_tox_v3_model.h5 -o ./cnn_wiki_tox_v3_model.h5
!curl -L https://storage.googleapis.com/what-if-tool-resources/computefest2019/cnn_wiki_tox_v3_hparams.h5 -o ./cnn_wiki_tox_v3_hparams.h5
!curl -L https://storage.googleapis.com/what-if-tool-resources/computefest2019/cnn_wiki_tox_v3_tokenizer.pkl -o ./cnn_wiki_tox_v3_tokenizer.pkl
!curl -L https://storage.googleapis.com/what-if-tool-resources/computefest2019/cnn_debias_tox_v3_model.h5 -o ./cnn_debias_tox_v3_model.h5
!curl -L https://storage.googleapis.com/what-if-tool-resources/computefest2019/cnn_debias_tox_v3_hparams.h5 -o ./cnn_debias_tox_v3_hparams.h5
!curl -L https://storage.googleapis.com/what-if-tool-resources/computefest2019/cnn_debias_tox_v3_tokenizer.pkl -o ./cnn_debias_tox_v3_tokenizer.pkl
!curl -L https://storage.googleapis.com/what-if-tool-resources/computefest2019/wiki_test.csv -o ./wiki_test.csv
In [0]:
#@title Load the keras models
import sys
import tensorflow as tf
from six.moves import cPickle as pkl
def pkl_load(f):
return pkl.load(f) if sys.version_info < (3, 0) else pkl.load(
f, encoding='latin1')
model1 = tf.keras.models.load_model('cnn_wiki_tox_v3_model.h5')
with open('cnn_wiki_tox_v3_tokenizer.pkl', 'rb') as f:
tokenizer1 = pkl_load(f)
tokenizer1.oov_token = None # quick fix for version issues
model2 = tf.keras.models.load_model('cnn_debias_tox_v3_model.h5')
with open('cnn_debias_tox_v3_tokenizer.pkl', 'rb') as f:
tokenizer2 = pkl_load(f)
tokenizer2.oov_token = None # quick fix for version issues
In [0]:
#@title Define custom prediction functions so that WIT infers using keras models
import tensorflow as tf
# Set up model helper functions:
PADDING_LEN = 250
# Convert list of tf.Examples to list of comment strings.
def examples_to_strings(examples):
texts = [ex.features.feature['comment'].bytes_list.value[0] for ex in examples]
if sys.version_info >= (3, 0):
texts = [t.decode('utf-8') for t in texts]
return texts
# Get raw string out of tf.Example and prepare it for keras model input
def examples_to_model_in(examples, tokenizer):
texts = examples_to_strings(examples)
# Tokenize string into fixed length sequence of integer based on tokenizer
# and model padding
text_sequences = tokenizer.texts_to_sequences(texts)
model_ins = tf.keras.preprocessing.sequence.pad_sequences(
text_sequences, maxlen=PADDING_LEN)
return model_ins
# WIT predict functions:
def custom_predict_1(examples_to_infer):
model_ins = examples_to_model_in(examples_to_infer, tokenizer1)
preds = model1.predict(model_ins)
return preds
def custom_predict_2(examples_to_infer):
model_ins = examples_to_model_in(examples_to_infer, tokenizer2)
preds = model2.predict(model_ins)
return preds
In [0]:
#@title Define helper functions for dataset conversion from csv to tf.Examples
import numpy as np
import tensorflow as tf
# Converts a dataframe into a list of tf.Example protos.
def df_to_examples(df, columns=None):
examples = []
if columns == None:
columns = df.columns.values.tolist()
for index, row in df.iterrows():
example = tf.train.Example()
for col in columns:
if df[col].dtype is np.dtype(np.int64):
example.features.feature[col].int64_list.value.append(int(row[col]))
elif df[col].dtype is np.dtype(np.float64):
example.features.feature[col].float_list.value.append(row[col])
elif row[col] == row[col]:
example.features.feature[col].bytes_list.value.append(row[col].encode('utf-8'))
examples.append(example)
return examples
# Converts a dataframe column into a column of 0's and 1's based on the provided test.
# Used to force label columns to be numeric for binary classification using a TF estimator.
def make_label_column_numeric(df, label_column, test):
df[label_column] = np.where(test(df[label_column]), 1, 0)
In [0]:
#@title Read the dataset from CSV and process it for model {display-mode: "form"}
import pandas as pd
# Set the path to the CSV containing the dataset to train on.
csv_path = 'wiki_test.csv'
# Set the column names for the columns in the CSV. If the CSV's first line is a header line containing
# the column names, then set this to None.
csv_columns = None
# Read the dataset from the provided CSV and print out information about it.
df = pd.read_csv(csv_path, names=csv_columns, skipinitialspace=True)
df = df[['is_toxic', 'comment']]
# Remove non ascii characters
comments = df['comment'].values
proc_comments = []
comment_lengths = []
for c in comments:
try:
if sys.version_info >= (3, 0):
c = bytes(c, 'utf-8')
c = c.decode('unicode_escape')
if sys.version_info < (3, 0):
c = c.encode('ascii', 'ignore')
proc_comment = c.strip()
except:
proc_comment = ''
proc_comments.append(proc_comment)
comment_lengths.append(len(proc_comment.split()))
df = df.assign(comment=proc_comments)
df['comment length'] = comment_lengths
label_column = 'is_toxic'
make_label_column_numeric(df, label_column, lambda val: val)
examples = df_to_examples(df)
In [0]:
#@title Define a custom distance function for comparing datapoints (uses tf.Hub) {display-mode: "form"}
import tensorflow as tf
import tensorflow_hub as hub
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
# For this use-case, we set distance between datapoints to be cosine distance
# between unit-normalized embeddings of each datapoint from the tf.Hub
# Universal Sentence Encoder.
def universal_sentence_encoder_distance(input_example, examples_to_compare, _):
# Extract comment strings
input_sentence = examples_to_strings([input_example])[0]
sentences = examples_to_strings(examples_to_compare)
# Normalize all embeddings for cosine distance operation
input_emb = tf.squeeze(tf.nn.l2_normalize(embed([input_sentence]), axis=1))
sentences_emb = tf.nn.l2_normalize(embed(sentences), axis=1)
# Tile the input example for easy comparison to all examples
multiply = tf.constant([len(examples_to_compare)])
input_matrix = tf.reshape(tf.tile(input_emb, multiply),
[multiply[0], tf.shape(input_emb)[0]])
# Compute cosine distance from input example to all examples.
cosine_distance = tf.keras.losses.CosineSimilarity(
axis=1, reduction=tf.losses.Reduction.NONE)
distances = cosine_distance(sentences_emb, input_matrix)
results = tf.squeeze(distances)
return results.numpy().tolist()
In [0]:
#@title Invoke What-If Tool for the data and two models (Note that this step may take a while due to prediction speed of the toxicity model){display-mode: "form"}
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder
num_datapoints = 1000 #@param {type: "number"}
tool_height_in_px = 720 #@param {type: "number"}
# Setup the tool with the test examples and the trained classifier
config_builder = WitConfigBuilder(examples[:num_datapoints]).set_custom_predict_fn(
custom_predict_1).set_compare_custom_predict_fn(custom_predict_2).set_custom_distance_fn(
universal_sentence_encoder_distance)
wv = WitWidget(config_builder, height=tool_height_in_px)
In [0]:
#@title Add a feature column for each identity term to indicate if it exists in the comment
!wget https://raw.githubusercontent.com/conversationai/unintended-ml-bias-analysis/master/unintended_ml_bias/bias_madlibs_data/adjectives_people.txt
import re
import six
with open('adjectives_people.txt', 'r') as f:
segments = f.read().strip().split('\n')
print(segments)
# Tag every sentence with an identity term
comments = df['comment'].values
seg_anns = {}
selected_segments = segments
for s in selected_segments:
is_seg = []
for c in comments:
if re.search(s, c):
is_seg.append(1)
else:
is_seg.append(0)
seg_anns[s] = is_seg
for seg_key, seg_ann in six.iteritems(seg_anns):
df[seg_key] = pd.Series(seg_ann, index=df.index)
examples = df_to_examples(df)