In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%load_ext autotime

from baselines import load_comments_and_labels, assemble_data, one_hot
from ngram import test_cross,calibration_curve_plotter,two_class_roc_plotter, two_class_precision_recall_plotter
from ngram import roc_scorer, spearman_scorer
import matplotlib.pyplot as plt
import joblib
import os
from serialization import save_pipeline, load_pipeline
import pandas as pd
from sklearn.isotonic import IsotonicRegression as IR


Using TensorFlow backend.

In [7]:
def save_calibrator(task, model_name, data):
    cv_path = '../../models/cv'
    results_path = '../../models/%s/%s' % (task, model_name)
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    
    #load model
    m = load_pipeline(results_path, model_name + "_train" )

    # train calibrator on dev and test set
    # all data comes from data sampled at random
    X, y = assemble_data(data, 'comments', 'plurality', splits = ['dev', 'test'], samples = ['random'])
    y_pred = m.predict_proba(X)[:,1]
    joblib.dump(IR( out_of_bounds = 'clip' ).fit( y_pred, y ), os.path.join(results_path, 'calibrator'))


time: 5.09 ms

In [10]:
tasks = ['attack', 'recipient', 'aggression']
models = ['linear_char_ed', 'mlp_char_ed', 'linear_char_oh', 'mlp_char_oh']


time: 1.46 ms

In [11]:
for task in tasks:
    data = load_comments_and_labels(task)
    for model in models:
        save_calibrator(task, model, data)


time: 12min 48s

In [ ]: