Baselines


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%load_ext autotime

from baselines import get_annotator_ensemble_baselines_parallel,get_annotator_ensemble_baseline, get_model_baselines_parallel, get_model_baseline
from baselines import multi_class_roc_auc, multi_class_spearman
from baselines import empirical_dist
from ngram import load_comments_and_labels,assemble_data
from pprint import pprint
import pandas as pd
import numpy as np
from serialization import load_pipeline
import os


Using TensorFlow backend.

In [2]:
iters = 25
K = 20
F = int(K/2)


time: 2.52 ms

In [3]:
annotations = pd.read_csv('../../data/annotations/split/baseline/annotations.tsv', sep='\t')
annotations.index = annotations.rev_id
comments = annotations.drop_duplicates('rev_id')['clean_diff']


time: 2.08 s

In [4]:
metrics = {'ROC': multi_class_roc_auc, 'spearman':multi_class_spearman}
tasks = ['attack'] 
#tasks = ['attack' 'recipient', 'aggression']
annotations_subsets = {'all': annotations} 
#annotations_subsets = {
#    'all': d,
#    'blocked' : annotations.query("sample=='blocked'"),
#    'random' : annotations.query("sample=='random'")
#}


time: 4.53 ms

Get annotator ensemble baselines


In [33]:
pairs = list(zip(range(1, F+1), range(1, F+1))) + list(zip(range(1, F+1), [F]*F))


time: 1.45 ms

In [34]:
dfs = []

for task in tasks:
    for annotations_subset_name, annotations_subset in annotations_subsets.items():
        task_annotations  = annotations_subset[task]
        for metric_name, metric in metrics.items():
            args = [[task_annotations, K, empirical_dist, metric, n_t, n_p] for n_t, n_p in pairs] * iters
            result = get_annotator_ensemble_baselines_parallel(args)
            result['metric'] = metric_name
            result['task'] = task
            dfs.append(result)
ensemble_baseline_results = pd.concat(dfs)
ensemble_baseline_results['score'] = ensemble_baseline_results['score'] * 100
ensemble_baseline_results = ensemble_baseline_results.groupby(['metric', 'n_p', 'n_t', 'task'])['score'].agg({"mean": np.mean, 'std': np.std, 'count': 'count'})
pd.DataFrame(ensemble_baseline_results).to_csv('baselines1.csv')


time: 1h 59min 17s

In [35]:
ensemble_baseline_results


Out[35]:
std mean count
metric n_p n_t task
ROC 1 1 attack 0.603945 74.250538 25
2 2 attack 0.635044 89.430232 25
3 3 attack 0.465894 90.707681 25
4 4 attack 0.272224 95.227470 25
5 5 attack 0.313270 95.335467 25
6 6 attack 0.290291 97.146305 25
7 7 attack 0.188060 97.101439 25
8 8 attack 0.158457 98.096487 25
9 9 attack 0.189766 98.120313 25
10 1 attack 0.429253 88.545554 25
2 attack 0.345153 95.862280 25
3 attack 0.319777 95.499989 25
4 attack 0.246593 97.316654 25
5 attack 0.231063 97.130499 25
6 attack 0.182448 98.055153 25
7 attack 0.151837 97.817255 25
8 attack 0.118801 98.375502 25
9 attack 0.148479 98.242783 25
10 attack 0.124876 98.532701 50
spearman 1 1 attack 1.014905 48.204331 25
2 2 attack 0.674873 58.072047 25
3 3 attack 0.616251 62.270098 25
4 4 attack 0.565034 64.881500 25
5 5 attack 0.675606 66.466298 25
6 6 attack 0.726201 67.698623 25
7 7 attack 0.551162 68.912229 25
8 8 attack 0.439214 69.652008 25
9 9 attack 0.441514 70.511064 25
10 1 attack 0.796424 53.587828 25
2 attack 0.532865 61.457489 25
3 attack 0.440277 64.755400 25
4 attack 0.505699 66.814374 25
5 attack 0.468124 68.271294 25
6 attack 0.539926 69.214320 25
7 attack 0.600601 69.863408 25
8 attack 0.404351 70.362734 25
9 attack 0.449219 70.977329 25
10 attack 0.360211 71.111486 50
time: 70.5 ms

Get model baseline


In [5]:
dfs = []
cv_path = '../../models/attack'
model_type = 'linear_char_ed'
model_name = 'linear_char_ed_train'

for task in tasks:
    model = load_pipeline(os.path.join(cv_path,model_type), model_name)
    y_baseline_pred = model.predict_proba(comments)
    y_baseline_pred = pd.DataFrame(y_baseline_pred,  index = comments.index)

    for annotations_subset_name, annotations_subset in annotations_subsets.items():
        task_annotations  = annotations_subset[task]
        for metric_name, metric in metrics.items():
            args = [[y_baseline_pred, task_annotations, K, empirical_dist, metric, F]] * iters
            result = get_model_baselines_parallel(args)
            result['metric'] = metric_name
            result['task'] = task
            dfs.append(result)
model_baseline_results = pd.concat(dfs)
model_baseline_results['score'] = model_baseline_results['score'] * 100
model_baseline_results = model_baseline_results.groupby(['metric', 'n_t','task' ])['score'].agg({"mean": np.mean, 'std': np.std, 'count': 'count'})
pd.DataFrame(model_baseline_results).to_csv('baselines2.csv')


time: 3min 51s

In [6]:
model_baseline_results


Out[6]:
std mean count
metric n_t task
ROC 10 attack 0.143543 97.194932 25
spearman 10 attack 0.436937 66.019843 25
time: 28.9 ms

In [ ]: