In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import os
import pandas as pd
import numpy as np
import pkg_resources
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import time
import scipy.stats as stats
from sklearn import metrics
%matplotlib inline
# autoreload makes it easier to interactively work on code in imported libraries
%load_ext autoreload
%autoreload 2
# Set pandas display options so we can read more of the comment text.
pd.set_option('max_colwidth', 300)
import model_bias_analysis
In [2]:
# Print python version
import sys
print(sys.version)
In [3]:
# Read madlibs dataset, convert Label to Booleans
madlibs_df = pd.read_csv('eval_datasets/bias_madlibs_77k_scored_prod_models.csv')
madlibs_df.loc[madlibs_df.Label == 'BAD', 'Label'] = True
madlibs_df.loc[madlibs_df.Label == 'NOT_BAD', 'Label'] = False
# Needed for AUC metrics to work properly
madlibs_df['Label'] = madlibs_df['Label'].astype(bool)
In [4]:
# Add boolean identity columns, e.g. "hug american" should have True in the "american" column.
madlibs_identity_terms = model_bias_analysis.read_identity_terms('bias_madlibs_data/adjectives_people.txt')
model_bias_analysis.add_subgroup_columns_from_text(madlibs_df, 'Text', madlibs_identity_terms)
madlibs_df.head()
Out[4]:
In [5]:
def convert_to_bool_column(df, src_column, dst_column):
df.loc[df[src_column] >= 0.5, dst_column] = True
df.loc[df[src_column] < 0.5, dst_column] = False
# convert NaNs to False
df.loc[df[src_column].isnull(), dst_column] = False
# Needed for AUC metrics to work properly
df[dst_column] = df[dst_column].astype(bool)
In [6]:
# Read Civil Comments dataset, set Label column to be a Boolean toxicity value.
civil_comments_df = pd.read_csv('eval_datasets/civil_comments_scored_prod_models.csv')
convert_to_bool_column(civil_comments_df, 'toxicity', 'Label')
In [7]:
# Convert identity terms to booleans
civil_comments_identity_terms = [
'male', 'female', 'transgender', 'other_gender', 'heterosexual',
'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation', 'christian',
'jewish', 'muslim', 'hindu', 'buddhist', 'atheist', 'other_religion', 'black',
'white', 'asian', 'latino', 'other_race_or_ethnicity',
'physical_disability', 'intellectual_or_learning_disability',
'psychiatric_or_mental_illness', 'other_disability']
for identity in civil_comments_identity_terms:
convert_to_bool_column(civil_comments_df, identity, identity)
civil_comments_df.head()
Out[7]:
In [8]:
# Create another view of the data that is limited to the comments < 100 characters.
civil_comments_short_df = civil_comments_df.loc[civil_comments_df['Text'].str.len() < 100]
In [9]:
# Calculate bias metrics on each dataset
madlibs_bias_metrics = model_bias_analysis.compute_bias_metrics_for_models(
madlibs_df,
madlibs_identity_terms,
['Rock:TOXICITY', 'RockV6_2:TOXICITY'],
'Label')
civil_comments_bias_metrics = model_bias_analysis.compute_bias_metrics_for_models(
civil_comments_df,
civil_comments_identity_terms,
['Rock:TOXICITY', 'RockV6_2:TOXICITY'],
'Label')
In [10]:
civil_comments_short_bias_metrics = model_bias_analysis.compute_bias_metrics_for_models(
civil_comments_short_df,
civil_comments_identity_terms,
['Rock:TOXICITY', 'RockV6_2:TOXICITY'],
'Label')
In [11]:
# Sort bias metrics by weighted pinned AUC
def sort_by_weighted_pinned_auc(bias_metrics):
bias_metrics['weighted_pinned_auc'] = (
bias_metrics['Rock:TOXICITY_bnsp_auc'] +
bias_metrics['Rock:TOXICITY_bpsn_auc'] +
bias_metrics['Rock:TOXICITY_subgroup_auc']) / 3
bias_metrics = bias_metrics.sort_values('weighted_pinned_auc', ascending=True)
return bias_metrics.drop('weighted_pinned_auc', axis=1)
madlibs_bias_metrics = sort_by_weighted_pinned_auc(madlibs_bias_metrics)
#civil_comments_bias_metrics = sort_by_weighted_pinned_auc(civil_comments_bias_metrics)
#civil_comments_short_bias_metrics = sort_by_weighted_pinned_auc(civil_comments_short_bias_metrics)
In [12]:
civil_comments_short_bias_metrics
Out[12]:
In [13]:
# Define functions for printing bias metric heatmaps
N_AEG_COLORS = 12
aeg_negative_colors = sns.cubehelix_palette(N_AEG_COLORS, # n_colors
0.6, # start at magenta
0, # no rotation
3, # gamma
1, # dark
1, # light
reverse=True,
as_cmap=False)[int(N_AEG_COLORS/2):]
aeg_positive_colors = sns.cubehelix_palette(N_AEG_COLORS, # n_colors
2, # greenish
0, # no rotation
3, # gamma
1, # dark
1, # light
reverse=False,
as_cmap=False)[:int(N_AEG_COLORS/2)]
aeg_colors = aeg_negative_colors + aeg_positive_colors
def limit_subgroups(bias_metrics, subgroups):
return bias_metrics.loc[bias_metrics.subgroup.isin(subgroups)]
def print_auc_heatmap(bias_metrics, subgroups, model, out=None, color_palette=None):
bias_metrics_for_subgroups = limit_subgroups(bias_metrics, subgroups)
if not color_palette:
color_palette = sns.cubehelix_palette(100, # n_colors
0.6, # start at magenta
0, # no rotation
3, # gamma
1, # dark
1.2, # light
reverse=True,
as_cmap=False)
model_bias_analysis.plot_auc_heatmap(bias_metrics_for_subgroups, [model], color_palette, out=out)
def print_aeg_heatmap(bias_metrics, subgroups, model, out=None, color_palette=None):
bias_metrics_for_subgroups = limit_subgroups(bias_metrics, subgroups)
if not color_palette:
color_palette = aeg_colors
model_bias_analysis.plot_aeg_heatmap(bias_metrics_for_subgroups, [model], color_palette, out=out)
def get_identities_over_n(df, n, all_identities):
results = []
for identity in all_identities:
num_records = len(df.query(identity + '==True'))
if num_records > 100:
results.append(identity)
return results
In [14]:
# Print Civil Comments heatmaps. Filter identities to only use those which have >= 100
# short comments written about them.
CIVIL_COMMENTS_HEATMAP_IDENTITIES = get_identities_over_n(civil_comments_short_df, 100, civil_comments_identity_terms)
print_auc_heatmap(civil_comments_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'Rock:TOXICITY', '/tmp/civil_aucs_1.png')
print_auc_heatmap(civil_comments_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'RockV6_2:TOXICITY', '/tmp/civil_aucs_6.png')
print_aeg_heatmap(civil_comments_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'Rock:TOXICITY', '/tmp/civil_aegs_1.png')
print_aeg_heatmap(civil_comments_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'RockV6_2:TOXICITY', '/tmp/civil_aegs_6.png')
# Print Civil Comments Short Comments heatmaps
print_auc_heatmap(civil_comments_short_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'Rock:TOXICITY', '/tmp/civil_short_aucs_1.png')
print_auc_heatmap(civil_comments_short_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'RockV6_2:TOXICITY', '/tmp/civil_short_aucs_6.png')
print_aeg_heatmap(civil_comments_short_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'Rock:TOXICITY', '/tmp/civil_short_aegs_1.png')
print_aeg_heatmap(civil_comments_short_bias_metrics, CIVIL_COMMENTS_HEATMAP_IDENTITIES, 'RockV6_2:TOXICITY', '/tmp/civil_short_aegs_6.png')
# Print Madlibs heatmaps, using same number of identities as civil comments
num_identities = 20
MADLIB_HEATMAP_IDENTITIES = madlibs_identity_terms[:num_identities]
print_auc_heatmap(madlibs_bias_metrics, MADLIB_HEATMAP_IDENTITIES, 'Rock:TOXICITY', '/tmp/madlibs_aucs_1.png')
print_auc_heatmap(madlibs_bias_metrics, MADLIB_HEATMAP_IDENTITIES, 'RockV6_2:TOXICITY', '/tmp/madlibs_aucs_6.png')
print_aeg_heatmap(madlibs_bias_metrics, MADLIB_HEATMAP_IDENTITIES, 'Rock:TOXICITY', '/tmp/madlibs_aegs_1.png')
print_aeg_heatmap(madlibs_bias_metrics, MADLIB_HEATMAP_IDENTITIES, 'RockV6_2:TOXICITY', '/tmp/madlibs_aegs_6.png')
In [15]:
def get_percent_toxic(df):
toxic_comments = df.query('Label == True')
return len(toxic_comments) / len(df)
def print_count_and_percent_toxic(df, identity):
# Query all training comments where the identity column equals True.
identity_comments = df.query(identity + ' == True')
# Query which of those comments also have "toxicity" equals True
toxic_identity_comments = identity_comments.query('toxicity == True')
# Alternatively you could also write a query using & (and), e.g.:
# toxic_identity_comments = train_df.query(identity + ' == True & toxicity == True')
# Print the results.
num_comments = len(identity_comments)
percent_toxic = get_percent_toxic(identity_comments)
print('%d comments refer to the %s identity, %.2f%% are toxic' % (
num_comments,
identity,
# multiply percent_toxic by 100 for easier reading.
100 * percent_toxic))
for identity in civil_comments_identity_terms:
print_count_and_percent_toxic(civil_comments_df, identity)
# Print the results for all comments
num_comments = len(civil_comments_df)
percent_toxic = get_percent_toxic(civil_comments_df)
print('%d comments for all identities, %.2f%% are toxic' % (
num_comments,
# multiply percent_toxic by 100 for easier reading.
100 * percent_toxic))
In [16]:
def plot_background_histogram(data, model, title, label_col='Label', out=None):
fig = plt.figure()
toxic_scores = data.query(label_col + ' == True')[model]
toxic_scores = toxic_scores.rename(title)
non_toxic_scores = data.query(label_col + ' == False')[model]
non_toxic_scores = non_toxic_scores.rename(title)
sns.distplot(non_toxic_scores , color='#2b6f39', bins=30)
sns.distplot(toxic_scores , color='#d38fc5', bins=30)
if out:
fig.savefig(out, format='png', bbox_inches='tight')
return fig
def plot_histogram(data, identity, model, title, label_col='Label', out=None):
fig = plt.figure()
toxic_scores = data.query(identity + ' == True & ' + label_col + ' == True')[model]
toxic_scores = toxic_scores.rename(title)
non_toxic_scores = data.query(identity + ' == True & '+ label_col + ' == False')[model]
non_toxic_scores = non_toxic_scores.rename(title)
sns.distplot(non_toxic_scores , color='#2b6f39', axlabel=title)
sns.distplot(toxic_scores , color='#d38fc5', axlabel=title)
if out:
fig.savefig(out, format='png', bbox_inches='tight')
return fig
In [17]:
plot_background_histogram(civil_comments_df, 'Rock:TOXICITY',
'Background - All Comments - TOXICITY@1', out='/tmp/hist_background_all_1.png')
plot_background_histogram(civil_comments_df, 'RockV6_2:TOXICITY',
'Background - All Comments - TOXICITY@6', out='/tmp/hist_background_all_6.png')
plot_background_histogram(civil_comments_short_df, 'Rock:TOXICITY',
'Background - Short Comments - TOXICITY@1', out='/tmp/hist_background_short_1.png')
plot_background_histogram(civil_comments_short_df, 'RockV6_2:TOXICITY',
'Background - Short Comments - TOXICITY@6', out='/tmp/hist_background_short_6.png')
Out[17]:
In [18]:
plot_histogram(civil_comments_df, 'homosexual_gay_or_lesbian', 'Rock:TOXICITY',
'homosexual_gay_or_lesbian - All Comments - TOXICITY@1', out='/tmp/hist_hgl_all_1.png')
plot_histogram(civil_comments_df, 'homosexual_gay_or_lesbian', 'RockV6_2:TOXICITY',
'homosexual_gay_or_lesbian - All Comments - TOXICITY@6', out='/tmp/hist_hgl_all_6.png')
plot_histogram(civil_comments_short_df, 'homosexual_gay_or_lesbian', 'Rock:TOXICITY',
'homosexual_gay_or_lesbian - Short Comments - TOXICITY@1', out='/tmp/hist_hgl_short_1.png')
plot_histogram(civil_comments_short_df, 'homosexual_gay_or_lesbian', 'RockV6_2:TOXICITY',
'homosexual_gay_or_lesbian - Short Comments - TOXICITY@6', out='/tmp/hist_hgl_short_6.png')
Out[18]: