We want our model to accurately measure the fraction of comments that are attacks for a group of comments. In this notebook we explore both model calibration and picking a threshold on the dev set and measure the reliability of our model on the test set.
In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from serialization import load_pipeline
from baselines import load_comments_and_labels, assemble_data
from sklearn.metrics import roc_curve, brier_score_loss, log_loss, roc_auc_score
from sklearn.calibration import CalibratedClassifierCV
In [2]:
# load data
task = 'attack'
data = load_comments_and_labels(task)
In [3]:
#load model trained on train split
model_name = 'linear_char_ed_train'
model_type = 'linear_char_ed'
path = '../../models/%s/%s' % (task, model_type)
model = load_pipeline(path, model_name)
In [4]:
# train calibrator on dev set, eval done on test
# all data comes from data sampled at random
from sklearn.linear_model import LogisticRegression as LR
from sklearn.isotonic import IsotonicRegression as IR
X_dev, y_dev = assemble_data(data, 'comments', 'plurality', splits = ['dev'], samples = ['random'])
X_test, y_test = assemble_data(data, 'comments', 'plurality', splits = ['test'], samples = ['random'])
y_dev_pred = model.predict_proba(X_dev)[:,1]
y_test_pred = model.predict_proba(X_test)[:,1]
In [5]:
lr_calibrator = LR().fit( y_dev_pred.reshape( -1, 1 ), y_dev )
ir_calibrator = IR( out_of_bounds = 'clip' ).fit( y_dev_pred, y_dev )
In [6]:
y_test_pred_lr_calibrated = lr_calibrator.predict_proba( y_test_pred.reshape( -1, 1 ))[:,1]
y_test_pred_ir_calibrated = ir_calibrator.transform( y_test_pred )
In [7]:
# plot calibration transformations
In [8]:
plt.scatter(y_test_pred, y_test_pred_lr_calibrated)
Out[8]:
In [9]:
plt.scatter(y_test_pred, y_test_pred_ir_calibrated)
Out[9]:
In [10]:
df_brier = pd.DataFrame()
df_brier['predicted_prob'] = y_test_pred
df_brier['raw_error'] = (y_test - y_test_pred)**2
df_brier['ir_error'] = (y_test - y_test_pred_ir_calibrated)**2
df_brier['lr_error'] = (y_test - y_test_pred_lr_calibrated)**2
df_brier = df_brier.sort_values('predicted_prob')
df_brier.index = df_brier['predicted_prob']
df_brier = pd.rolling_mean(df_brier[['raw_error', 'ir_error', 'lr_error']], 200)
df_brier.plot()
plt.ylabel('smoothed local brier score loss')
Out[10]:
In [11]:
# see how well model scores can be used to infer attack rates for each calibrator
def plot_attack_rates(y, y_pred):
print('brier_score_loss: ', brier_score_loss(y, y_pred))
print('log_loss: ', log_loss(y, y_pred))
print('roc_auc_score: ', roc_auc_score(y, y_pred))
true_rate = y.mean()
expected_rate = y_pred.mean()
print('true_rate: ', true_rate, ' expected_rate: ', expected_rate)
fpr, tpr, thresholds = roc_curve(y, y_pred)
rates = [true_rate * tpr[i] + (1-true_rate) * fpr[i] for i in range(len(thresholds))]
plt.plot(thresholds, rates, label='threshold rate')
plt.plot(thresholds, [true_rate] * len(rates), label='true rate')
plt.plot(thresholds, [expected_rate] * len(rates), label='expected rate')
plt.ylabel('attack rate')
plt.xlabel('threshold')
plt.ylim((true_rate - 2.0*true_rate, true_rate + 2.0*true_rate))
plt.legend()
plt.xlim(0.0, 0.6)
In [12]:
#no calibration
plot_attack_rates(y_test, y_test_pred)
In [13]:
# isotonic regression calibration
plot_attack_rates(y_test, y_test_pred_ir_calibrated)
In [14]:
# logistic regression calibration
plot_attack_rates(y_test, y_test_pred_lr_calibrated)
In [15]:
true_rate = y_dev.mean()
thresholds = sorted(y_dev_pred)
In [16]:
rates = [(y_dev_pred > t).mean() for t in thresholds]
In [17]:
plt.plot(thresholds, rates)
plt.ylim((true_rate - 0.1*true_rate, true_rate + 0.1*true_rate))
plt.xlim((0.4, 0.42))
plt.plot(thresholds, [true_rate] * len(rates), label='true rate')
Out[17]:
In [18]:
dev_er_t = 0.408
In [19]:
y_test.mean()
Out[19]:
In [20]:
(y_test_pred > dev_er_t).mean()
Out[20]:
In [22]:
from stats_utils import bayesian_rate_interval
num_attacks = y_test.sum()
num_comments = y_test.shape[0]
bayesian_rate_interval(num_attacks, num_comments, conf=95, plot =True)
Out[22]:
In [29]:
df_sub = pd.DataFrame()
df_sub['true'] = y_test
df_sub['threshold'] = (y_test_pred > threshold).astype(int)
df_sub['sum'] = y_test_pred_ir_calibrated
In [30]:
n = df_sub.shape[0]
In [31]:
ks = [1, 8, 64, int(n/64), int(n/8), n]
In [32]:
results_list=[]
for k in ks:
for i in range(1000):
idxs = np.random.choice(n, k)
df_temp = df_sub.loc[idxs]
result_dict = {
'true': df_sub.loc[idxs]['true'].mean(),
'sum': df_sub['sum'].loc[idxs].mean(),
'threshold': df_sub['threshold'].loc[idxs].mean(),
'k': k,
}
results_list.append(result_dict)
df_results = pd.DataFrame(results_list)
In [33]:
df_results = df_results*100
In [34]:
df_results.query('k==100')['true'].hist()
Out[34]:
In [35]:
df_results.groupby(['k']).mean()
Out[35]:
In [36]:
df_results.groupby(['k']).std()
Out[36]:
In [25]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))
from analysis.load_utils import load_diffs
In [27]:
d = load_diffs()
In [32]:
df_test = pd.read_csv('../../data/annotations/split/test/annotations.tsv', sep = '\t')
df_test = df_test.query("sample=='random'").drop_duplicates(subset='rev_id')[['rev_id', 'clean_diff']]
df_test = d['annotated'].merge(df_test, how = 'inner', on = 'rev_id')
df_test['pred_attack'] = (df_test['pred_attack_score_uncalibrated'] > dev_er_t).astype(int)
In [168]:
d1 = df_test[['ns', 'year', 'author_anon', 'user_id', 'user_text', 'clean_diff' ]].copy()
d1['source'] = 'crowd'
d1['attack'] = df_test.attack * 100
d2 = df_test[['ns', 'year', 'author_anon', 'user_id', 'user_text', 'clean_diff' ]].copy()
d2['source'] = 'model'
d2['attack'] = df_test.pred_attack * 100
d = pd.concat([d1, d2])
d['logged_in'] = d['author_anon'].apply(lambda x: not x)
d['thank'] = d['clean_diff'].apply(lambda x: 'thank' in x.lower())
In [170]:
sns.set(font_scale=4)
g = sns.pointplot(data= d, x = 'source', y = 'attack', dodge=0.15, capsize=.1, linestyles=[ ""], )
plt.ylabel('Attack percentage')
plt.xlabel('Label source')
plt.savefig('../../paper/figs/sanity_check.png',bbox_inches='tight')
In [171]:
d_tmp1 = d.query("source == 'crowd'").copy()
d_tmp1['x'] = 'crowd'
d_tmp2 = d.query("source == 'model'").copy()
d_tmp2['x'] = 'model'
In [172]:
d_tmp3 = d[:2].copy()
d_tmp3['x'] = 'crowd'
d_tmp3['attack'][0] = 0.7
d_tmp3['x'][0] = 'model'
d_tmp3['attack'][1] = 0.7
d_tmp3['x'][1] = 'crowd'
In [173]:
d_temp = pd.concat([d_tmp1, d_tmp2, d_tmp3])
In [174]:
d_temp['x'].value_counts()
Out[174]:
In [176]:
ax = sns.pointplot(data= d_temp, x = 'x', y = 'attack', hue = 'source', capsize=.1,linestyles=[" ", ""] )
ax.legend_.remove()
plt.ylim(0.8, 1.4)
plt.ylabel('Attack percentage')
plt.xlabel('Label source')
plt.savefig('../../paper/figs/sanity_check.png',bbox_inches='tight')
In [177]:
ax = sns.pointplot(data= d, x = 'ns', y = 'attack', hue = 'source', linestyles=[" ", ""], dodge=0.15, capsize=.1)
plt.xlabel('Namespace')
ax.legend_.remove()
plt.ylabel('Attack percentage')
plt.savefig('../../paper/figs/sanity_check_ns.png',bbox_inches='tight')
In [178]:
ax = sns.pointplot(data= d, x = 'logged_in', y = 'attack', hue = 'source', linestyles=[" ", ""], dodge=0.15, capsize=.1)
ax.legend_.remove()
plt.ylabel('Attack percentage')
plt.xlabel('Author Logged-in')
plt.savefig('../../paper/figs/sanity_check_anon.png', bbox_inches='tight')
In [179]:
def map_year(y):
if y < 2010:
return 'before 2010'
else:
return 'after 2010'
d['com_year'] = d.year.apply(map_year)
ax = sns.pointplot(data= d, x = 'com_year', y = 'attack', hue = 'source', dodge=0.15, capsize=.1, linestyles=[" ", ""], order = ['before 2010', 'after 2010'])
ax.legend_.remove()
plt.ylabel('Attack percentage')
plt.xlabel('Year')
plt.savefig('../../paper/figs/sanity_check_year.png',bbox_inches='tight')
In [180]:
d_gender = pd.read_csv('../../data/genders.tsv', sep = '\t')
d_gender = d.merge(d_gender, how = 'left', on = 'user_id').query('not author_anon')
d_gender.gender.fillna('unknown', inplace = True)
ax = sns.pointplot(data= d_gender, x = 'gender', y = 'attack', hue = 'source', dodge=0.15, capsize=.1, linestyles=[" ", ""])
ax.legend_.remove()
plt.ylabel('Attack percentage')
plt.xlabel('Gender')
plt.savefig('../../paper/figs/sanity_check_gender.png', bbox_inches='tight')
In [181]:
d_activity = pd.read_csv('../../data/long_term_users.tsv', sep = '\t')
d_activity = d.merge(d_activity, how = 'left', on = 'user_text').query('not author_anon')
d_activity.num_days.fillna(0, inplace = True)
def map_activity(a):
if a < 8:
return '0-7'
elif a < 31:
return '7-30'
else:
return '30 + '
d_activity['Days active'] = d_activity.num_days.apply(map_activity)
ax = sns.pointplot(data= d_activity, x = 'Days active', y = 'attack', hue = 'source',dodge=0.15, capsize=.1, linestyles=[" ", ""], order = ['0-7','7-30', '30 + ' ])
ax.legend_.remove()
plt.ylabel('Attack percentage')
plt.savefig('../../paper/figs/sanity_check_activity.png',bbox_inches='tight')
In [182]:
ax = sns.pointplot(data= d_activity, x = 'thank', y = 'attack', hue = 'source',dodge=0.15, capsize=.1, linestyles=[" ", ""])
ax.legend_.remove()
plt.ylabel('Attack percentage')
plt.xlabel("Contains word 'thank'")
plt.savefig('../../paper/figs/sanity_check_thank.png',bbox_inches='tight')
In [195]:
X_test, y_test_oh = assemble_data(data, 'comments', 'empirical_dist', splits = ['test', 'dev'], samples = ['random'])
y_test_pred = model.predict_proba(X_test)
In [204]:
from ngram import two_class_combo_plotter
sns.set(font_scale=1.5)
two_class_combo_plotter(y_test_oh, y_test_pred)
plt.xlim(0.41, 0.445)
plt.ylim(0.6, 0.7)
Out[204]:
In [ ]: