Imports


In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from IPython.display import display
import json
import numpy as np
import pandas as pd
import os
import random
import re
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import tensorflow as tf

Read scored test data


In [2]:
standard_data_path = 'gs://conversationai-models/biosbias/scored_data/test_standard_0409.csv'
scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_scrubbed_0409.csv'
very_scrubbed_data_path = 'gs://conversationai-models/biosbias/scored_data/test_very_scrubbed_0409.csv'
gender_data_path = 'gs://conversationai-models/biosbias/scored_data/test_data_gender.csv'


perf_df = pd.read_csv(tf.gfile.Open(standard_data_path)).drop_duplicates(subset=['tokens'])
scrubbed_df = pd.read_csv(tf.gfile.Open(scrubbed_data_path)).drop_duplicates(subset=['tokens'])
very_scrubbed_df = pd.read_csv(tf.gfile.Open(very_scrubbed_data_path)).drop_duplicates(subset=['tokens'])
gender_df = pd.read_csv(tf.gfile.Open(gender_data_path)).drop_duplicates(subset=['tokens'])

In [3]:
print(perf_df.shape)
print(scrubbed_df.shape)


(59824, 300)
(59820, 36)

In [4]:
df = perf_df.join(scrubbed_df, rsuffix = '_scrubbed')
df = df.join(very_scrubbed_df, rsuffix = '_very_scrubbed')

In [5]:
df.head()


Out[5]:
tokens gender label tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6 ... tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31 tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32
0 [u'he', u'is', u'currently', u'working', u'clo... M 25 0.000008 4.625991e-14 0.000089 0.000432 2.642943e-04 1.613340e-07 4.687537e-07 ... 0.001929 1.914383e-06 0.000097 0.000332 7.086468e-07 8.798547e-16 0.000041 0.000395 0.000054 8.315536e-08
1 [u'she', u'has', u'a', u'passion', u'for', u'w... F 26 0.000001 5.970340e-18 0.000004 0.000155 8.439872e-06 1.380430e-07 8.653511e-09 ... 0.013356 7.866625e-01 0.009269 0.024264 3.710595e-04 2.425320e-11 0.004488 0.002426 0.032467 1.274749e-04
2 [u'growing', u'up', u'under', u'the', u'influe... M 22 0.000205 1.023775e-15 0.008020 0.000054 1.159827e-06 2.420847e-06 4.043094e-06 ... 0.000135 8.046401e-04 0.002173 0.000697 3.003297e-05 8.979249e-14 0.001901 0.000097 0.001727 4.318769e-06
3 [u'he', u'earned', u'his', u'beng', u'degree',... M 25 0.000009 1.354895e-13 0.001508 0.000051 1.071294e-07 1.333064e-08 1.857020e-05 ... 0.009217 1.700057e-02 0.136035 0.009581 2.460610e-03 1.396903e-09 0.002276 0.009811 0.026841 1.840305e-04
4 [u'her', u'professional', u'and', u'educationa... F 25 0.001034 6.887217e-12 0.000701 0.021189 1.852501e-03 6.723991e-05 7.880444e-06 ... 0.000425 9.174340e-08 0.995151 0.001635 9.952086e-11 4.422046e-14 0.000974 0.000039 0.000482 1.483144e-07

5 rows × 372 columns


In [6]:
df.shape


Out[6]:
(59824, 372)

In [7]:
df = df.dropna()
print(df.shape)


(59753, 372)

Preprocessing


In [9]:
def get_class_from_col_name(col_name):
    #print(col_name)
    pattern = r'^.*_(\d+)$'
    return int(re.search(pattern, col_name).group(1))

In [10]:
def find_best_class(df, model_name, class_names):
    model_class_names = ['{}_{}'.format(model_name, class_name) for class_name in class_names]
    sub_df = df[model_class_names]
    df['{}_class'.format(model_name)] = sub_df.idxmax(axis=1).apply(get_class_from_col_name)

In [8]:
# Can check model names here
# df.columns.values


Out[8]:
array(['tokens', 'gender', 'label',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113_32',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131_32',
       'tokens_scrubbed', 'gender_scrubbed', 'label_scrubbed',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954_32',
       'tokens_very_scrubbed', 'gender_very_scrubbed',
       'label_very_scrubbed',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_0',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_1',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_2',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_3',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_4',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_5',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_6',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_7',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_8',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_9',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_10',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_11',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_12',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_13',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_14',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_15',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_16',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_17',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_18',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_19',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_20',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_21',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_22',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_23',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_24',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_25',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_26',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_27',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_28',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_29',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_30',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_31',
       'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254_32'],
      dtype=object)

In [11]:
# May have to change.
# Can look them up in experiment tracker.
MODEL_NAMES = {
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174837': 'debiased_tolga',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_174941': 'debiased_biosbias',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175003': 'strong_debiased_1',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175019': 'strong_debiased_2',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175034': 'strong_debiased_3',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175055': 'strong_debiased_4',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190328_103117': 'glove',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175113': 'strong_no_equalize',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175131': 'strong_no_projection', 
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190315_112954': 'scrubbed',
    'tf_trainer_tf_gru_attention_multiclass_biosbias_glove:v_20190410_175254': 'very_scrubbed'
}

In [12]:
CLASS_NAMES = range(33)

In [13]:
for _model in MODEL_NAMES:
    find_best_class(df, _model, CLASS_NAMES)

In [14]:
# Labels with either gender having too few examples
bad_labels = df.groupby('label').gender.value_counts().reset_index(name = 'count').query('count < 5').label.values
assert len(bad_labels) == 0

Accuracy Calculation


In [15]:
accuracy_list = []
for _model in MODEL_NAMES:
    is_correct = (df['{}_class'.format(_model)] == df['label'])
    _acc = sum(is_correct)/len(is_correct)
    accuracy_list.append(_acc)
    print ('Accuracy for model {}: {}'.format(MODEL_NAMES[_model], _acc))


Accuracy for model debiased_biosbias: 0.806972034877
Accuracy for model very_scrubbed: 0.355915184175
Accuracy for model debiased_tolga: 0.818921225713
Accuracy for model strong_debiased_1: 0.817984034274
Accuracy for model strong_no_projection: 0.806687530333
Accuracy for model strong_debiased_2: 0.81733134738
Accuracy for model strong_no_equalize: 0.815239402206
Accuracy for model glove: 0.817950563152
Accuracy for model strong_debiased_4: 0.814737335364
Accuracy for model strong_debiased_3: 0.817599116362
Accuracy for model scrubbed: 0.130503907754

Fairness Metrics


In [16]:
for _class in CLASS_NAMES:
    df['label_{}'.format(_class)] = (df['label'] == _class)

In [17]:
# Gender ratios of classes
gender_counts = df.groupby('label').gender.value_counts().reset_index(name = 'count')

In [18]:
def frac_female(df):
    m_count = df[df['gender'] == "M"]['count'].values[0]
    f_count = df[df['gender'] == "F"]['count'].values[0]
    return {'label': df['label'].values[0], 'frac_female': f_count/(m_count+f_count)}

In [19]:
frac_female_df = pd.DataFrame(list(gender_counts.groupby('label', as_index = False).apply(frac_female)))

In [20]:
def compute_tpr(df, _class, _model, threshold = 0.5):
    tpr = metrics.recall_score(df['label_{}'.format(_class)],
                               df['{}_{}'.format(_model,_class)] > threshold)
    return tpr
    
def compute_tpr_by_gender(df, _class, _model, threshold = 0.5):
    tpr_m = compute_tpr(df.query('gender == "M"'), _class, _model, threshold)
    tpr_f = compute_tpr(df.query('gender == "F"'), _class, _model, threshold)
    return {'M': tpr_m, 'F': tpr_f}

In [21]:
def compute_tpr_tnr(df, _class, _model, threshold = 0.5):
    #cm = metrics.confusion_matrix(df['label_{}'.format(_class)],
    #                              df['{}_{}'.format(_model,_class)] > threshold)
    cm = pd.crosstab(df['label_{}'.format(_class)], df['{}_{}'.format(_model,_class)] > threshold)
    #display(cm)
    if cm.shape[0] > 1:
        tn = cm.iloc[0,0]
        fp = cm.iloc[0,1]
        fn = cm.iloc[1,0]
        tp = cm.iloc[1,1]
        tpr = tp/(tp+fn)
        tnr = tn/(tn+fp)
    else:
        tpr = 0
        tnr = 1
    return tpr, tnr

def compute_tr_by_gender(df, _class, _model, threshold = 0.5):
    tpr_m, tnr_m = compute_tpr_tnr(df.query('gender == "M"'), _class, _model, threshold)
    tpr_f, tnr_f = compute_tpr_tnr(df.query('gender == "F"'), _class, _model, threshold)
    return {'TPR_m': tpr_m, 'TPR_f': tpr_f, 'TNR_m': tnr_m, 'TNR_f': tnr_f}

In [22]:
for _class in CLASS_NAMES:
    for _model in MODEL_NAMES:
        tpr_1 = compute_tpr(df, _class, _model)
        tpr_2, _ = compute_tpr_tnr(df, _class, _model)
        assert tpr_1 == tpr_2, '{} != {}'.format(tpr_1, tpr_2)
        #print('{} == {}'.format(tpr_1, tpr_2))

In [23]:
tpr_df = pd.DataFrame()
for _class in frac_female_df.label:
    row = {}
    row['label'] = _class
    for _model, _model_type in MODEL_NAMES.items():
        tpr, tnr = compute_tpr_tnr(df, _class, _model)
        row['{}_tpr'.format(_model_type)] = tpr
        row['{}_tnr'.format(_model_type)] = tnr
        gender_trs = compute_tr_by_gender(df, _class, _model)
        row['{}_tpr_F'.format(_model_type)] = gender_trs['TPR_f']
        row['{}_tpr_M'.format(_model_type)] = gender_trs['TPR_m']
        row['{}_tpr_gender_gap'.format(_model_type)] = gender_trs['TPR_f'] - gender_trs['TPR_m']
        row['{}_tnr_F'.format(_model_type)] = gender_trs['TNR_f']
        row['{}_tnr_M'.format(_model_type)] = gender_trs['TNR_m']
        row['{}_tnr_gender_gap'.format(_model_type)] = gender_trs['TNR_f'] - gender_trs['TNR_m']
    tpr_df = tpr_df.append(row, ignore_index = True)

In [24]:
results_df = pd.merge(tpr_df, frac_female_df, on = 'label')

In [25]:
TITLE_LABELS = [
    'accountant', 'acupuncturist', 'architect', 'attorney', 'chiropractor', 'comedian', 'composer', 'dentist',
    'dietitian', 'dj', 'filmmaker', 'interior_designer', 'journalist', 'landscape_architect', 'magician',
    'massage_therapist', 'model', 'nurse', 'painter', 'paralegal', 'pastor', 'personal_trainer',
    'photographer', 'physician', 'poet', 'professor', 'psychologist', 'rapper',
    'real_estate_broker', 'software_engineer', 'surgeon', 'teacher', 'yoga_teacher']

In [26]:
results_df['label_profession'] = results_df['label'].apply(lambda x: TITLE_LABELS[int(x)])

In [27]:
results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]


Out[27]:
frac_female
frac_female 1.000000
debiased_biosbias_tpr_gender_gap 0.829982
very_scrubbed_tpr_gender_gap 0.458378
debiased_tolga_tpr_gender_gap 0.824882
strong_debiased_1_tpr_gender_gap 0.716922
strong_no_projection_tpr_gender_gap 0.709000
strong_debiased_2_tpr_gender_gap 0.596896
strong_no_equalize_tpr_gender_gap 0.772645
glove_tpr_gender_gap 0.794059
strong_debiased_4_tpr_gender_gap 0.550435
strong_debiased_3_tpr_gender_gap 0.707174
scrubbed_tpr_gender_gap -0.282919

In [28]:
tpr_gender_gap_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]
tnr_gender_gap_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]

In [29]:
gender_gap_df = results_df[['label_profession', 'frac_female']+tpr_gender_gap_cols+tnr_gender_gap_cols]
#gender_gap_df.columns = ['label_profession', 'frac_female']+['{}'.format(_model) for _model in MODEL_NAMES.values()]

In [30]:
gender_gap_df.sort_values('frac_female', ascending = False)


Out[30]:
label_profession frac_female debiased_biosbias_tpr_gender_gap very_scrubbed_tpr_gender_gap debiased_tolga_tpr_gender_gap strong_debiased_1_tpr_gender_gap strong_no_projection_tpr_gender_gap strong_debiased_2_tpr_gender_gap strong_no_equalize_tpr_gender_gap glove_tpr_gender_gap ... very_scrubbed_tnr_gender_gap debiased_tolga_tnr_gender_gap strong_debiased_1_tnr_gender_gap strong_no_projection_tnr_gender_gap strong_debiased_2_tnr_gender_gap strong_no_equalize_tnr_gender_gap glove_tnr_gender_gap strong_debiased_4_tnr_gender_gap strong_debiased_3_tnr_gender_gap scrubbed_tnr_gender_gap
7 dietitian 0.920437 0.290927 0.173878 0.297707 0.199900 0.223862 0.187072 0.250980 0.232835 ... -0.000674 -0.002692 -0.001220 -0.001617 -0.001530 -0.002213 -0.002810 -0.001630 -0.001412 -0.000266
13 nurse 0.914502 0.082735 0.013742 0.085377 0.048740 0.033271 0.025981 0.057404 0.082411 ... -0.001686 -0.007627 -0.007427 -0.004478 -0.004807 -0.004455 -0.005866 -0.002840 -0.002707 -0.001573
15 paralegal 0.866109 0.375755 0.094656 0.317482 0.262077 0.256944 0.271437 0.314915 0.271437 ... 0.000247 -0.000103 0.000095 -0.000012 0.000075 -0.000219 -0.000164 -0.000060 -0.000236 0.000195
27 yoga_teacher 0.858696 0.276534 0.005518 0.143784 0.208049 0.116196 0.195067 0.161636 0.208374 ... 0.000535 -0.001455 -0.001289 -0.000758 -0.001393 -0.001211 -0.001211 -0.001081 -0.001399 0.000299
12 model 0.818988 0.480652 0.176120 0.544309 0.418456 0.460211 0.455824 0.532551 0.505093 ... -0.001022 -0.000566 0.000379 0.000429 -0.000039 -0.000513 -0.001008 0.000249 0.000181 0.001161
10 interior_designer 0.782609 0.182716 -0.013580 0.243210 0.081481 0.096296 0.041975 0.224691 0.270370 ... 0.000226 -0.000884 0.000032 -0.000023 0.000024 -0.000676 -0.000201 0.000216 -0.000013 0.000407
22 psychologist 0.620751 0.000799 0.008890 0.045876 0.043524 0.045169 0.020219 0.042056 0.017593 ... -0.000742 -0.005913 -0.004535 -0.002672 -0.002096 -0.004275 -0.002278 -0.003760 -0.002820 -0.001450
26 teacher 0.604382 0.111221 0.025352 0.129299 0.111760 0.113756 0.114246 0.119168 0.137121 ... -0.000813 -0.004694 -0.002497 -0.004570 -0.001141 -0.002609 -0.002664 -0.002461 -0.001785 0.000671
11 journalist 0.492152 0.019865 0.010182 0.057554 0.021920 0.001790 0.013070 0.042923 0.058686 ... 0.000762 -0.000167 0.001286 0.001514 0.001955 0.000651 0.000014 0.001617 0.001571 -0.002623
19 physician 0.491203 0.019845 0.036850 0.056989 0.035120 0.042554 0.040719 0.034896 0.024797 ... 0.001413 0.005790 0.006125 0.006385 0.006968 0.004761 0.007537 0.001844 0.004126 0.000307
20 poet 0.483051 -0.044163 0.009395 -0.007190 0.012207 0.006903 -0.006711 0.016393 0.001949 ... 0.000827 -0.000845 -0.001125 -0.000642 -0.000209 -0.000453 -0.000933 -0.000733 -0.000761 -0.000698
17 personal_trainer 0.468293 -0.080944 -0.011850 -0.068043 0.032397 -0.028670 -0.037557 -0.091361 -0.049694 ... -0.000783 -0.000399 -0.001005 -0.000138 -0.000473 -0.000470 -0.000456 -0.000816 -0.000737 0.000032
21 professor 0.452428 -0.018119 0.011301 -0.011141 -0.015243 -0.012384 0.002382 -0.004640 -0.002251 ... -0.001640 0.001259 0.001349 0.004071 -0.003034 -0.004298 -0.003673 -0.000717 -0.003564 -0.001995
14 painter 0.452361 0.003161 0.036012 0.017337 -0.035538 0.012959 0.006991 -0.001613 -0.002095 ... -0.000951 -0.000336 -0.000125 -0.000197 0.000173 0.000315 -0.000022 -0.000223 0.000050 0.000144
0 accountant 0.374554 -0.055930 -0.031311 -0.043805 -0.025312 0.000459 -0.015143 -0.044432 -0.060287 ... 0.001341 0.000090 0.000390 0.000694 0.000483 0.000683 0.000757 0.000344 0.000355 -0.000109
2 attorney 0.367104 -0.035824 -0.003903 -0.007270 0.007254 0.013928 0.004176 -0.010897 -0.004719 ... 0.003337 -0.000622 -0.001509 -0.001953 -0.001427 -0.001875 -0.002338 -0.002469 -0.001728 -0.000642
18 photographer 0.356927 -0.052775 -0.011488 -0.036094 -0.004054 -0.017355 -0.004763 -0.035910 -0.031379 ... 0.001568 0.000615 -0.000121 -0.000051 -0.000025 0.001542 0.001537 -0.000092 0.000170 -0.000297
6 dentist 0.345824 0.009651 -0.040738 0.003124 0.013102 0.014166 0.008104 0.017242 0.015563 ... 0.000945 0.000738 0.000532 0.000574 0.000563 0.000409 0.000801 0.000516 0.000359 0.000283
9 filmmaker 0.322148 -0.005893 -0.023485 -0.017356 0.038690 0.032797 0.018358 -0.019507 -0.001827 ... 0.000127 0.002068 0.001236 0.001535 0.001641 0.001847 0.002094 0.001204 0.000814 0.002261
3 chiropractor 0.298824 -0.025604 -0.004360 -0.073746 -0.023146 0.024071 -0.021350 -0.011547 -0.028457 ... -0.000152 0.000921 0.000345 0.000175 0.000617 0.000419 0.000127 0.000096 0.000309 0.000236
16 pastor 0.229282 -0.274172 -0.069785 -0.259533 -0.096731 -0.127909 -0.156583 -0.218206 -0.166127 ... -0.000073 0.001051 0.000741 0.000602 0.000453 0.001137 0.001293 0.000333 0.000333 0.000199
1 architect 0.225399 0.053551 0.003069 0.003208 0.106769 0.110808 0.073486 -0.005593 0.049996 ... -0.001115 0.004353 0.001076 0.001065 0.002815 0.005941 0.002935 0.002423 0.003652 -0.001770
4 comedian 0.219457 -0.225967 -0.081757 -0.156671 -0.065501 -0.076109 -0.087733 -0.118004 -0.124757 ... 0.000234 0.000593 -0.000030 0.000366 -0.000133 0.000633 0.000753 0.000327 -0.000055 0.000581
24 software_engineer 0.157746 -0.065456 0.023591 -0.056956 -0.042324 -0.060300 -0.021202 0.015468 -0.036829 ... 0.001209 0.005100 0.009260 0.007132 0.006266 0.003512 0.006474 0.004885 0.004344 -0.000074
25 surgeon 0.153592 -0.229816 -0.051839 -0.245461 -0.122859 -0.127233 -0.089205 -0.220015 -0.207968 ... 0.002435 0.005888 0.004638 0.002488 0.002320 0.004059 0.005013 0.003432 0.003316 0.000101
5 composer 0.153186 -0.068712 0.036272 -0.048370 -0.001737 -0.050061 -0.008452 -0.064452 -0.063849 ... 0.001154 0.001802 0.001463 0.001543 0.001389 0.001676 0.001567 0.001011 0.001245 0.002464
8 dj 0.145299 -0.103824 0.099118 -0.145000 0.027647 -0.083824 0.000882 -0.178824 -0.040588 ... 0.000244 0.001133 0.000257 0.000211 0.000721 0.000564 0.000206 0.000826 0.000299 -0.000025
23 rapper 0.085859 -0.138772 0.047449 -0.096198 0.017225 0.030224 0.175496 0.019175 -0.012350 ... 0.000577 0.000155 0.000631 0.000491 0.000268 0.000834 0.001200 0.000645 0.000180 -0.000048

28 rows × 24 columns


In [31]:
# Fraction of comments where new model has lower
# TPR gap than the baseline

def compute_fraction_improved(df, baseline_model, improved_model):
    is_improved = np.abs(df[baseline_model]) >= np.abs(df[improved_model])
    return np.mean(is_improved)

In [32]:
for _model in MODEL_NAMES.values():
    print(_model)
    print(compute_fraction_improved(gender_gap_df, 'glove_tpr_gender_gap', '{}_tpr_gender_gap'.format(_model)))


debiased_biosbias
0.32142857142857145
very_scrubbed
0.7142857142857143
debiased_tolga
0.2857142857142857
strong_debiased_1
0.6428571428571429
strong_no_projection
0.6071428571428571
strong_debiased_2
0.7142857142857143
strong_no_equalize
0.39285714285714285
glove
1.0
strong_debiased_4
0.6071428571428571
strong_debiased_3
0.6071428571428571
scrubbed
0.8571428571428571

In [33]:
tpr_cols = ['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]
tnr_cols = ['{}_tnr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]
gender_gap_cols = tpr_cols + tnr_cols

In [34]:
gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(x**2))


Out[34]:
debiased_biosbias_tpr_gender_gap       0.029446
very_scrubbed_tpr_gender_gap           0.003786
debiased_tolga_tpr_gender_gap          0.028584
strong_debiased_1_tpr_gender_gap       0.014313
strong_no_projection_tpr_gender_gap    0.015602
strong_debiased_2_tpr_gender_gap       0.016134
strong_no_equalize_tpr_gender_gap      0.025152
glove_tpr_gender_gap                   0.022636
strong_debiased_4_tpr_gender_gap       0.016461
strong_debiased_3_tpr_gender_gap       0.014632
scrubbed_tpr_gender_gap                0.000189
debiased_biosbias_tnr_gender_gap       0.000011
very_scrubbed_tnr_gender_gap           0.000001
debiased_tolga_tnr_gender_gap          0.000009
strong_debiased_1_tnr_gender_gap       0.000009
strong_no_projection_tnr_gender_gap    0.000006
strong_debiased_2_tnr_gender_gap       0.000006
strong_no_equalize_tnr_gender_gap      0.000006
glove_tnr_gender_gap                   0.000008
strong_debiased_4_tnr_gender_gap       0.000003
strong_debiased_3_tnr_gender_gap       0.000004
scrubbed_tnr_gender_gap                0.000001
dtype: float64

In [35]:
gender_gap_df[gender_gap_cols].apply(lambda x: np.mean(np.abs(x)))


Out[35]:
debiased_biosbias_tpr_gender_gap       0.119049
very_scrubbed_tpr_gender_gap           0.041268
debiased_tolga_tpr_gender_gap          0.114932
strong_debiased_1_tpr_gender_gap       0.075670
strong_no_projection_tpr_gender_gap    0.079293
strong_debiased_2_tpr_gender_gap       0.075149
strong_no_equalize_tpr_gender_gap      0.102661
glove_tpr_gender_gap                   0.096764
strong_debiased_4_tpr_gender_gap       0.083171
strong_debiased_3_tpr_gender_gap       0.070882
scrubbed_tpr_gender_gap                0.007773
debiased_biosbias_tnr_gender_gap       0.002204
very_scrubbed_tnr_gender_gap           0.000958
debiased_tolga_tnr_gender_gap          0.002066
strong_debiased_1_tnr_gender_gap       0.001811
strong_no_projection_tnr_gender_gap    0.001657
strong_debiased_2_tnr_gender_gap       0.001537
strong_no_equalize_tnr_gender_gap      0.001866
glove_tnr_gender_gap                   0.001997
strong_debiased_4_tnr_gender_gap       0.001316
strong_debiased_3_tnr_gender_gap       0.001376
scrubbed_tnr_gender_gap                0.000747
dtype: float64

In [36]:
def plot_tpr_gap(df, _model):
    fig, ax = plt.subplots(figsize=(15, 6))
    x = 'frac_female'
    y = '{}_tpr_gender_gap'.format(_model)
    p1 = sns.regplot(x = x, y = y, data = df)
    p1.set(xlabel = "% Female", ylabel = "TPR Gender Gap", title = _model)

    for line in range(0,df.shape[0]):
         p1.text(results_df[x][line]+0.01, df[y][line], df['label_profession'][line], horizontalalignment='left', size='medium', color='black')
    plt.show()

In [37]:
for _model in MODEL_NAMES.values():
    if 'untuned' in _model:
        plot_tpr_gap(results_df, _model)

In [38]:
results_df[['frac_female']+['{}_tpr_gender_gap'.format(_model) for _model in MODEL_NAMES.values()]].corr()[['frac_female']]


Out[38]:
frac_female
frac_female 1.000000
debiased_biosbias_tpr_gender_gap 0.829982
very_scrubbed_tpr_gender_gap 0.458378
debiased_tolga_tpr_gender_gap 0.824882
strong_debiased_1_tpr_gender_gap 0.716922
strong_no_projection_tpr_gender_gap 0.709000
strong_debiased_2_tpr_gender_gap 0.596896
strong_no_equalize_tpr_gender_gap 0.772645
glove_tpr_gender_gap 0.794059
strong_debiased_4_tpr_gender_gap 0.550435
strong_debiased_3_tpr_gender_gap 0.707174
scrubbed_tpr_gender_gap -0.282919

Gender Prediction Analysis


In [39]:
# Which model does this correspond to?
model_name = 'tf_gru_attention_multiclass_gender_biosbias_glove:v_20190405_142640'
gender_df['correct'] = ((gender_df['gender'] == 'M') == gender_df[model_name])

In [40]:
acc = gender_df.correct.sum()/gender_df.correct.count()
print('Accuracy: {:.4f}'.format(acc))


Accuracy: 0.8423

In [ ]: