Using the fatal encounters dataset,
create a classifier that takes as input text and other attributes and tries
to predict the target variable Official disposition of death (justified or other).
protected_class_attribute)
in the dataset?potentially discriminatory patterns? With respect
to which definition of fairness?
In [7]:
import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import Markdown, display
sns.set_style("white")
%matplotlib inline
In [8]:
data = pd.read_csv("data/fatal_encounters_dataset.csv")
# clean data column names
data.columns = (
data.columns
.str.replace("'", "")
.str.replace("[^a-zA-Z]", "_")
.str.replace("_+", "_")
.str.strip("_")
.str.lower()
.str.strip()
)
data = data[data.columns[~data.columns.str.startswith("unnamed")]]
In [9]:
def examine(df, n_sample=3):
return (
df.describe(include="all").T
[["count", "unique", "mean", "std"]]
.merge(
df.apply(
lambda s: s.sample(
n_sample, random_state=90).reset_index(drop=True))
.T.rename(columns={
i: "sample_%s" % (i + 1) for i in range(n_sample)}),
how="left", left_index=True, right_index=True))
examine(data, n_sample=2)
Out[9]:
In [10]:
# TARGET VARIABLE
JUSTIFIED = "official_disposition_of_death_justified_or_other"
# Features of interest
SENSITIVE_ATTRIBUTES = [
"subjects_name",
"subjects_age",
"subjects_gender",
"subjects_race",
"url_of_image_of_deceased",
"symptoms_of_mental_illness"
]
FEATURES = [
"agency_responsible_for_death",
"cause_of_death",
"a_brief_description_of_the_circumstances_surrounding_the_death",
"location_of_death_city",
"location_of_death_state",
"location_of_death_zip_code",
"location_of_death_county",
]
In [11]:
def plot_categorical(s, top_n=15, **kwargs):
ax = s.value_counts().sort_values().tail(top_n).plot.barh(**kwargs)
ax.set_xlabel("frequency");
sns.despine()
return ax
plot_categorical(data[JUSTIFIED], figsize=(8, 7));
In [12]:
JUSTIFIED_STRINGS = [
"Justified",
"Justifed",
"Jusified",
"Justified by internal review",
"Justified by outside agency",
"Justified by District Attorney",
"Other justified (Civilian board/Prosecutor/District Attorney/Coroner)"
]
UNKNOWN_STRINGS = [
"Unreported",
"Unknown",
]
RACE = "subjects_race"
GENDER = "subjects_gender"
def encode_target(s):
if pd.isnull(s):
return "UNKNOWN"
s = s.strip()
if s in JUSTIFIED_STRINGS:
return "JUSTIFIED"
elif s in UNKNOWN_STRINGS:
return "UNKNOWN"
else:
return "OTHER"
gender_encoding_map = {
"Female": "FEMALE",
"Femalr": "FEMALE",
"Transgender": "TRANSGENDER",
"Male": "MALE",
}
race_encoding_map = {
"Race unspecified": "RACE_UNSPECIFIED",
"European-American/White": "WHITE",
"African-American/Black": "BLACK",
"Hispanic/Latino": "LATINO",
"Asian/Pacific Islander": "ASIAN_PACIFIC_ISLANDER",
"Native American/Alaskan": "NATIVE_AMERICAN_ALASKAN",
"Middle Eastern": "MIDDLE_EASTERN",
}
clean_data = data.copy()
clean_data[JUSTIFIED] = data[JUSTIFIED].map(encode_target)
clean_data[JUSTIFIED].value_counts().to_frame()
clean_data[GENDER] = data[GENDER].map(gender_encoding_map)
clean_data[RACE] = data[RACE].map(race_encoding_map)
# exclude records with "UNKNOWN" disposition and "UNSPECIFIED RACE"
clean_data = clean_data[clean_data[JUSTIFIED] != "UNKNOWN"]
clean_data = clean_data[clean_data[RACE] != "RACE_UNSPECIFIED"]
clean_data[JUSTIFIED].value_counts().to_frame()
Out[12]:
In [13]:
clean_data.subjects_gender.value_counts().to_frame()
Out[13]:
In [14]:
clean_data.subjects_race.value_counts().to_frame()
Out[14]:
In [15]:
examine(clean_data[FEATURES])
Out[15]:
In [16]:
clean_data.cause_of_death.value_counts().to_frame()
Out[16]:
TODO: tokenize a_brief_description_of_the_circumstances_surrounding_the_death
so that text is represented as a word vector.
In [17]:
from themis_ml.metrics import mean_difference, mean_confidence_interval
def report_mean_difference(y, s_list):
report = []
index = []
for s_name, s in s_list:
s_notnull = s.notnull()
report.append(
map(lambda x: x * 100, mean_difference(y[s_notnull], s[s_notnull])))
index.append("{s_name} vs. NOT {s_name}".format(s_name=s_name))
return pd.DataFrame(
report, columns=["mean difference", "lower bound", "upper bound"],
index=index)
is_justified = clean_data[JUSTIFIED] == "JUSTIFIED"
gender_vectors = [
(g, (clean_data.subjects_gender == g).astype(int))
for g in clean_data.subjects_gender.dropna().unique()]
gender_report = report_mean_difference(is_justified, gender_vectors)
gender_report
Out[17]:
In [18]:
def plot_report(report):
margin = (report["mean difference"] - report["lower bound"]).abs()
ax = report[["mean difference"]].plot(
kind="barh", xerr=margin, legend=False)
ax.axvline(0, color="k")
ax.set_xlabel("mean difference")
sns.despine(bottom=True, left=True)
plot_report(gender_report)
If mean difference is negative with respect to some sensitive attribute value
$s \in \{d, a\}$ and some outcome $y \in \{y^{+}, y^{-}\}$ , it implies that
the members of the putatively disadvantaged class $d$ experiences the
beneficial outcome $y^{+}$ more often compared to the advantaged class $a$.
Conversely, if mean difference is positive with respect to some sensitive
attribute value $s \in \{d, a\}$ and some outcome $y \in \{y^{+}, y^{-}\}$ ,
it implies that members the putatively disadvantaged class $d$ experiences
the harmful outcome $y^{-}$ more often compared to the advantaged class $a$.
Interestingly, MALEs experience JUSTIFIED fatal encounters more than
their NON MALE counterparts
In [19]:
race_vectors = [
(r, (clean_data.subjects_race == r).astype(int))
for r in clean_data.subjects_race.dropna().unique()]
race_report = report_mean_difference(is_justified, race_vectors)
race_report
Out[19]:
In [20]:
plot_report(race_report)
In [21]:
mental_illness_vectors = [
(r, (clean_data.symptoms_of_mental_illness == r).astype(int))
for r in clean_data.symptoms_of_mental_illness.dropna().unique()]
mental_illness_report = report_mean_difference(
is_justified, mental_illness_vectors)
mental_illness_report
Out[21]:
In [22]:
plot_report(mental_illness_report)
Interestingly, MALE and BLACK people experience JUSTIFIED fatal encounters more than
their NON MALE and NON BLACK counterparts, respectively.
This leads me to suspect that the labels official_disposition_of_death_justified_or_other
are somehow skewed against these two sensitive attribute value.
WHO LABELLED THESE RECORDS?
In [23]:
import itertools
import numpy as np
import pandas as pd
from sklearn.model_selection import RepeatedKFold, RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from themis_ml.linear_model import LinearACFClassifier
from sklearn.metrics import (
accuracy_score, roc_auc_score, f1_score)
In [24]:
FAIRNESS_UNAWARE_FEATURES = [
("subjects_age", "NUMERIC"),
("subjects_gender", "CATEGORICAL"),
("subjects_race", "CATEGORICAL"),
("symptoms_of_mental_illness", "CATEGORICAL"),
("agency_responsible_for_death", "CATEGORICAL"),
("cause_of_death", "CATEGORICAL"),
("location_of_death_city", "CATEGORICAL"),
("location_of_death_state", "CATEGORICAL"),
("location_of_death_zip_code", "CATEGORICAL"),
("location_of_death_county", "CATEGORICAL"),
]
training_data = []
for feature, dtype in FAIRNESS_UNAWARE_FEATURES:
if dtype == "NUMERIC":
f = clean_data[feature].str.replace("[^0-9]", "").astype(float)
training_data.append(f.where(f.notnull(), f.mean()))
elif dtype == "CATEGORICAL":
training_data.append(pd.get_dummies(clean_data[[feature]].fillna("NULL")))
training_data = pd.concat(training_data, axis=1)
features = training_data.columns
training_data = training_data.assign(
target=(clean_data[JUSTIFIED] == "JUSTIFIED").astype(int))
assert training_data.notnull().all().all()
training_data.head()
Out[24]:
In [32]:
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=10)
estimators = [
("logistic_regression", LogisticRegression()),
("linear_acf", LinearACFClassifier()),
]
X = training_data[features].values
y = training_data["target"].values
s = training_data["subjects_race_BLACK"].values
strata = training_data["target"].astype(int).astype(str).str.cat(
training_data["subjects_race_BLACK"].astype(int).astype(str), sep="_")
preds = []
for i, (train, test) in enumerate(cv.split(X, strata, groups=strata)):
print("."),
X_train, X_test = X[train], X[test]
y_train, y_test = y[train], y[test]
s_train, s_test = s[train], s[test]
for est_name, estimator in estimators:
fit_args = (X_train, y_train, s_train) if est_name == "linear_acf" \
else (X_train, y_train)
predict_args = (X_test, s_test) if est_name == "linear_acf" \
else (X_test, )
estimator.fit(*fit_args)
preds.append(
pd.DataFrame({
"pred_y": estimator.predict_proba(*predict_args)[:, 1],
"pred_label": estimator.predict(*predict_args).astype(int),
"true_y": y_test.astype(int),
"sensitive_attribute": s_test,
"rep_fold": i,
"estimator": est_name,
}))
preds = pd.concat(preds)
In [76]:
def compute_metrics(df):
accuracy = accuracy_score(df.true_y, df.pred_label)
mean_diff, lower, upper = mean_difference(df.pred_label, df.sensitive_attribute)
return pd.Series({
"accuracy": accuracy,
"mean difference": mean_diff,
})
metrics = (
preds
.groupby(["estimator", "rep_fold"])
.apply(compute_metrics)
.reset_index(0)
.pipe(pd.melt, id_vars="estimator", var_name="metric",
value_name="value")
)
sns.factorplot(
x="value", y="estimator",
hue="metric",
row="metric",
sharex=False,
data=metrics,
size=3, aspect=1.5,
join=False);
In [79]:
(
metrics
.groupby(["metric", "estimator"])
.agg([np.mean, np.std]))
Out[79]: