In [1]:
import numpy as np
import pandas as pd
from ipywidgets import FloatProgress
from IPython.display import display

import os
import sys

module_path = os.path.abspath(os.path.join('../lstm'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from evaluate import *

In [2]:
num_samples = 10000

In [3]:
combined = "./combined_prediction.tsv"
baseline = "./SVM+L1.tsv"
cdf = pd.DataFrame.from_csv(combined, sep='\t', index_col=False)
bdf = pd.DataFrame.from_csv(baseline, sep='\t', index_col=False)

prog = FloatProgress(min=0, max=num_samples)
display(prog)

p = np.zeros(num_samples)

for i in range(num_samples):
    cdf_sample = cdf.sample(frac=1.0,replace=True)
    mask = cdf_sample.index.values
    bdf_sample = bdf.ix[mask]

    baseline_eval = Evaluator(bdf_sample, "None")
    exp_eval = Evaluator(cdf_sample, "None")

    baseline_f1 = baseline_eval.micro()
    exp_f1 = exp_eval.micro()
    # test if experimental f1 is better than baseline for this sample
    if (exp_f1 > baseline_f1):
        p[i] = 1
    prog.value += 1

prog.close()

1.0 - float(p.sum()) / float(num_samples)


Out[3]:
0.018199999999999994