In [1]:
import numpy as np
import pandas as pd
from ipywidgets import FloatProgress
from IPython.display import display
import os
import sys
module_path = os.path.abspath(os.path.join('../lstm'))
if module_path not in sys.path:
sys.path.append(module_path)
from evaluate import *
In [2]:
num_samples = 10000
In [3]:
combined = "./combined_prediction.tsv"
baseline = "./SVM+L1.tsv"
cdf = pd.DataFrame.from_csv(combined, sep='\t', index_col=False)
bdf = pd.DataFrame.from_csv(baseline, sep='\t', index_col=False)
prog = FloatProgress(min=0, max=num_samples)
display(prog)
p = np.zeros(num_samples)
for i in range(num_samples):
cdf_sample = cdf.sample(frac=1.0,replace=True)
mask = cdf_sample.index.values
bdf_sample = bdf.ix[mask]
baseline_eval = Evaluator(bdf_sample, "None")
exp_eval = Evaluator(cdf_sample, "None")
baseline_f1 = baseline_eval.micro()
exp_f1 = exp_eval.micro()
# test if experimental f1 is better than baseline for this sample
if (exp_f1 > baseline_f1):
p[i] = 1
prog.value += 1
prog.close()
1.0 - float(p.sum()) / float(num_samples)
Out[3]: