In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%load_ext autotime
from baselines import load_comments_and_labels, assemble_data, one_hot
from ngram import test_cross,calibration_curve_plotter,two_class_roc_plotter, two_class_precision_recall_plotter
from ngram import roc_scorer, spearman_scorer
import matplotlib.pyplot as plt
import joblib
import os
from serialization import save_pipeline, load_pipeline
import pandas as pd
from sklearn.isotonic import IsotonicRegression as IR
In [7]:
def save_calibrator(task, model_name, data):
cv_path = '../../models/cv'
results_path = '../../models/%s/%s' % (task, model_name)
if not os.path.exists(results_path):
os.makedirs(results_path)
#load model
m = load_pipeline(results_path, model_name + "_train" )
# train calibrator on dev and test set
# all data comes from data sampled at random
X, y = assemble_data(data, 'comments', 'plurality', splits = ['dev', 'test'], samples = ['random'])
y_pred = m.predict_proba(X)[:,1]
joblib.dump(IR( out_of_bounds = 'clip' ).fit( y_pred, y ), os.path.join(results_path, 'calibrator'))
In [10]:
tasks = ['attack', 'recipient', 'aggression']
models = ['linear_char_ed', 'mlp_char_ed', 'linear_char_oh', 'mlp_char_oh']
In [11]:
for task in tasks:
data = load_comments_and_labels(task)
for model in models:
save_calibrator(task, model, data)
In [ ]: