In [11]:
import pandas as pd
import os
import sys
module_path = os.path.abspath(os.path.join('../lstm'))
if module_path not in sys.path:
    sys.path.append(module_path)
from evaluate import *
import itertools
from ipywidgets import FloatProgress
from IPython.display import display
import operator
from collections import Counter
from math import factorial
from functools import reduce

In [2]:
column_dict = dict()
for f in os.listdir("."):
    if f.endswith(".tsv") and f not in {"all_sieves.tsv", "combined_prediction.tsv", "tmp.csv"}:
        model_name = f.replace(".tsv", "")
        df = pd.DataFrame.from_csv(f, sep='\t', index_col=False)
        column_dict["Gold"] = df.Gold.values
        column_dict[model_name] = df.Predicted.values

df = pd.DataFrame(column_dict)

df.to_csv("all_sieves.tsv", sep="\t", index=False)

In [39]:
df = pd.DataFrame.from_csv('all_sieves.tsv', sep='\t', index_col=False)

# in order of precision
#sieve_order = ('RBW', 'LR+L2', 'RF', 'LR+L1', 'SVM+L1', 'SVM+L2', 'RBB', 'FLSTM', 'LSTM', 'LSTM+P', 'FLSTM+P')
sieve_order = ('RBW', 'LR+L2', 'RF')

def choose_first(row, sieves):
    for sieve in sieves:
        prediction = row[sieve]
        if prediction != "None":
            return prediction
    return "None"

df["Predicted"] = df.apply(lambda x: choose_first(x, sieve_order), axis=1)

combined = df[["Gold", "Predicted"]]
combined.to_csv('combined_prediction.tsv', sep='\t', index=False)

In [40]:
evaluator = Evaluator(combined, "None")
print(evaluator.generate_scores_df().round(2))


            Class     P     R    F1   TP   FP   FN
0  E1 precedes E2  0.63  0.45  0.53   74   43   89
1  E2 precedes E1  0.00  0.00  0.00    0    3   28
2            None  0.86  0.94  0.90  671  113   42
3           MACRO  0.32  0.23  0.26   74   46  117
4           MICRO  0.62  0.39  0.48   74   46  117

In [24]:
def calc_performance(df, sieves):
    predictions = df.apply(lambda x: choose_first(x, sieves), axis=1)
    combined = pd.DataFrame({'Gold': df.Gold.values, 'Predicted': predictions.values})
    evaluator = Evaluator(combined, "None")
    results = evaluator.generate_scores_df().round(2)
#     print(results)
    f1 = results.loc[4,"F1"]
    return (sieves, f1)

In [6]:
df = pd.DataFrame.from_csv('all_sieves.tsv', sep='\t', index_col=False)

In [12]:
def npermutations(l):
    num = factorial(len(l))
    mults = Counter(l).values()
    den = reduce(operator.mul, (factorial(v) for v in mults), 1)
    return num / den

In [29]:
# sieve_order = ('RBW', 'RBB', 'LR+L2', 'RF', 'SVM+L1', 'FLSTM', 'LSTM', 'LSTM+P', 'FLSTM+P')
# sieve_order = ('LR+L2', 'RF', 'SVM+L1', 'FLSTM', 'LSTM', 'LSTM+P', 'FLSTM+P')
sieve_order = ('RBW', 'RBB', 'LR+L2', 'RF', 'FLSTM')

total_permutations = 0
for i in range(1,len(sieve_order)+1):
    total_permutations = total_permutations + npermutations(sieve_order[0:i])

performance = set()

op = FloatProgress(min=1, max=total_permutations)
display(op)
for i in range(1,len(sieve_order)+1):
    for p in itertools.permutations(sieve_order, r=i):
        op.value = op.value + 1
        (sieves, f1) = calc_performance(df, p)
        performance.add((sieves, f1))

(best_sieves, best_f1) = max(performance, key=lambda x: x[-1])

print("optimal sieve order: {}".format(best_sieves))
print("best f1: {}".format(best_f1))


optimal sieve order: ('LR+L2', 'FLSTM', 'RF', 'RBB', 'RBW')
best f1: 0.49

In [30]:
for (sieves, f1) in performance:
    if (f1 == best_f1):
        print("{} => {}".format(sieves, f1))


('LR+L2', 'FLSTM', 'RF', 'RBB', 'RBW') => 0.49
('LR+L2', 'RBW', 'FLSTM', 'RBB', 'RF') => 0.49
('RBW', 'RF', 'LR+L2', 'FLSTM', 'RBB') => 0.49
('FLSTM', 'RBB', 'LR+L2', 'RBW', 'RF') => 0.49
('FLSTM', 'RBB', 'RF', 'LR+L2') => 0.49
('RBW', 'FLSTM', 'RF', 'LR+L2', 'RBB') => 0.49
('RBB', 'RF', 'FLSTM', 'LR+L2') => 0.49
('RBW', 'FLSTM', 'RF', 'LR+L2') => 0.49
('RBB', 'FLSTM', 'RBW', 'RF', 'LR+L2') => 0.49
('RBB', 'RBW', 'LR+L2', 'FLSTM', 'RF') => 0.49
('FLSTM', 'RF', 'RBB', 'LR+L2', 'RBW') => 0.49
('FLSTM', 'RF', 'LR+L2', 'RBB', 'RBW') => 0.49
('RF', 'RBB', 'FLSTM', 'LR+L2', 'RBW') => 0.49
('LR+L2', 'FLSTM', 'RBW', 'RF', 'RBB') => 0.49
('LR+L2', 'RBW', 'RF', 'RBB', 'FLSTM') => 0.49
('RBW', 'FLSTM', 'RBB', 'LR+L2', 'RF') => 0.49
('RBW', 'FLSTM', 'LR+L2', 'RBB', 'RF') => 0.49
('LR+L2', 'RBB', 'RF', 'FLSTM', 'RBW') => 0.49
('LR+L2', 'RBB', 'FLSTM', 'RF', 'RBW') => 0.49
('RF', 'RBB', 'FLSTM', 'RBW', 'LR+L2') => 0.49
('FLSTM', 'LR+L2', 'RF', 'RBW') => 0.49
('LR+L2', 'RF', 'RBW', 'FLSTM') => 0.49
('RBW', 'RBB', 'RF', 'LR+L2', 'FLSTM') => 0.49
('FLSTM', 'LR+L2', 'RF') => 0.49
('FLSTM', 'RBW', 'LR+L2', 'RF', 'RBB') => 0.49
('FLSTM', 'RBB', 'RF', 'LR+L2', 'RBW') => 0.49
('RBB', 'LR+L2', 'RBW', 'FLSTM', 'RF') => 0.49
('RF', 'LR+L2', 'FLSTM', 'RBW') => 0.49
('LR+L2', 'FLSTM', 'RBW', 'RBB', 'RF') => 0.49
('LR+L2', 'FLSTM', 'RBB', 'RBW', 'RF') => 0.49
('RBB', 'LR+L2', 'RF', 'RBW', 'FLSTM') => 0.49
('LR+L2', 'FLSTM', 'RF') => 0.49
('FLSTM', 'RBB', 'RBW', 'RF', 'LR+L2') => 0.49
('RF', 'FLSTM', 'RBW', 'LR+L2', 'RBB') => 0.49
('RBW', 'RF', 'LR+L2', 'FLSTM') => 0.49
('RF', 'FLSTM', 'LR+L2', 'RBW', 'RBB') => 0.49
('RBW', 'LR+L2', 'RF', 'FLSTM', 'RBB') => 0.49
('RF', 'LR+L2', 'RBW', 'FLSTM') => 0.49
('RBW', 'LR+L2', 'FLSTM', 'RF', 'RBB') => 0.49
('RBW', 'LR+L2', 'RBB', 'FLSTM', 'RF') => 0.49
('RBB', 'RBW', 'RF', 'LR+L2', 'FLSTM') => 0.49
('RBB', 'FLSTM', 'RBW', 'LR+L2', 'RF') => 0.49
('RBB', 'FLSTM', 'LR+L2', 'RBW', 'RF') => 0.49
('LR+L2', 'RF', 'FLSTM', 'RBB', 'RBW') => 0.49
('LR+L2', 'RF', 'RBB', 'FLSTM') => 0.49
('RF', 'LR+L2', 'RBW', 'FLSTM', 'RBB') => 0.49
('RF', 'RBW', 'RBB', 'LR+L2', 'FLSTM') => 0.49
('RF', 'RBW', 'FLSTM', 'LR+L2') => 0.49
('RF', 'RBW', 'LR+L2', 'RBB', 'FLSTM') => 0.49
('RF', 'LR+L2', 'FLSTM', 'RBB', 'RBW') => 0.49
('RBW', 'RF', 'RBB', 'FLSTM', 'LR+L2') => 0.49
('FLSTM', 'RF', 'RBW', 'LR+L2') => 0.49
('FLSTM', 'RBW', 'RF', 'RBB', 'LR+L2') => 0.49
('LR+L2', 'RF', 'FLSTM') => 0.49
('RBB', 'FLSTM', 'LR+L2', 'RF', 'RBW') => 0.49
('FLSTM', 'LR+L2', 'RF', 'RBW', 'RBB') => 0.49
('RBB', 'FLSTM', 'LR+L2', 'RF') => 0.49
('RF', 'FLSTM', 'LR+L2', 'RBB') => 0.49
('RF', 'RBW', 'FLSTM', 'LR+L2', 'RBB') => 0.49
('FLSTM', 'RBB', 'RBW', 'LR+L2', 'RF') => 0.49
('RF', 'RBW', 'LR+L2', 'FLSTM') => 0.49
('RBB', 'RF', 'LR+L2', 'FLSTM', 'RBW') => 0.49
('RBW', 'RBB', 'RF', 'FLSTM', 'LR+L2') => 0.49
('RBW', 'RBB', 'FLSTM', 'RF', 'LR+L2') => 0.49
('RBW', 'RF', 'RBB', 'LR+L2', 'FLSTM') => 0.49
('RBW', 'RF', 'LR+L2', 'RBB', 'FLSTM') => 0.49
('RBB', 'RF', 'FLSTM', 'RBW', 'LR+L2') => 0.49
('RF', 'RBB', 'LR+L2', 'FLSTM') => 0.49
('RBW', 'RBB', 'LR+L2', 'FLSTM', 'RF') => 0.49
('RF', 'LR+L2', 'FLSTM', 'RBB') => 0.49
('RF', 'FLSTM', 'RBB', 'LR+L2') => 0.49
('RBW', 'LR+L2', 'RF', 'FLSTM') => 0.49
('LR+L2', 'RBB', 'RBW', 'FLSTM', 'RF') => 0.49
('FLSTM', 'LR+L2', 'RBB', 'RF') => 0.49
('FLSTM', 'RF', 'RBW', 'RBB', 'LR+L2') => 0.49
('LR+L2', 'RBB', 'RF', 'RBW', 'FLSTM') => 0.49
('RBW', 'FLSTM', 'RBB', 'RF', 'LR+L2') => 0.49
('RBB', 'RBW', 'RF', 'FLSTM', 'LR+L2') => 0.49
('RBB', 'RBW', 'FLSTM', 'RF', 'LR+L2') => 0.49
('FLSTM', 'RF', 'RBW', 'LR+L2', 'RBB') => 0.49
('FLSTM', 'RF', 'LR+L2', 'RBW', 'RBB') => 0.49
('LR+L2', 'RF', 'FLSTM', 'RBW', 'RBB') => 0.49
('RBW', 'LR+L2', 'RBB', 'RF', 'FLSTM') => 0.49
('RF', 'FLSTM', 'RBW', 'LR+L2') => 0.49
('FLSTM', 'LR+L2', 'RF', 'RBB') => 0.49
('RF', 'FLSTM', 'RBW', 'RBB', 'LR+L2') => 0.49
('LR+L2', 'FLSTM', 'RBW', 'RF') => 0.49
('RF', 'RBW', 'RBB', 'FLSTM', 'LR+L2') => 0.49
('RF', 'FLSTM', 'RBB', 'RBW', 'LR+L2') => 0.49
('LR+L2', 'FLSTM', 'RBB', 'RF', 'RBW') => 0.49
('RBW', 'LR+L2', 'FLSTM', 'RF') => 0.49
('RF', 'LR+L2', 'RBB', 'FLSTM') => 0.49
('LR+L2', 'RBW', 'RBB', 'FLSTM', 'RF') => 0.49
('FLSTM', 'LR+L2', 'RBW', 'RBB', 'RF') => 0.49
('FLSTM', 'LR+L2', 'RBB', 'RBW', 'RF') => 0.49
('RBW', 'RF', 'FLSTM', 'LR+L2', 'RBB') => 0.49
('RBW', 'FLSTM', 'LR+L2', 'RF', 'RBB') => 0.49
('FLSTM', 'RBB', 'LR+L2', 'RF') => 0.49
('LR+L2', 'RBB', 'RF', 'FLSTM') => 0.49
('RBB', 'FLSTM', 'RF', 'RBW', 'LR+L2') => 0.49
('FLSTM', 'RF', 'LR+L2', 'RBW') => 0.49
('LR+L2', 'RF', 'FLSTM', 'RBW') => 0.49
('RBB', 'RBW', 'FLSTM', 'LR+L2', 'RF') => 0.49
('RF', 'RBB', 'LR+L2', 'FLSTM', 'RBW') => 0.49
('RF', 'LR+L2', 'RBW', 'RBB', 'FLSTM') => 0.49
('RF', 'LR+L2', 'RBB', 'RBW', 'FLSTM') => 0.49
('LR+L2', 'FLSTM', 'RF', 'RBW', 'RBB') => 0.49
('LR+L2', 'FLSTM', 'RF', 'RBB') => 0.49
('LR+L2', 'RBW', 'RBB', 'RF', 'FLSTM') => 0.49
('RBB', 'RF', 'LR+L2', 'FLSTM') => 0.49
('RF', 'RBB', 'RBW', 'FLSTM', 'LR+L2') => 0.49
('FLSTM', 'RF', 'LR+L2') => 0.49
('RF', 'LR+L2', 'FLSTM') => 0.49
('RBW', 'RBB', 'LR+L2', 'RF', 'FLSTM') => 0.49
('LR+L2', 'RBB', 'FLSTM', 'RF') => 0.49
('FLSTM', 'RBW', 'RF', 'LR+L2', 'RBB') => 0.49
('FLSTM', 'RBB', 'LR+L2', 'RF', 'RBW') => 0.49
('RBB', 'LR+L2', 'FLSTM', 'RBW', 'RF') => 0.49
('RBB', 'FLSTM', 'RF', 'LR+L2') => 0.49
('RBW', 'RF', 'FLSTM', 'LR+L2') => 0.49
('RBB', 'LR+L2', 'RBW', 'RF', 'FLSTM') => 0.49
('RBW', 'FLSTM', 'LR+L2', 'RF') => 0.49
('FLSTM', 'RBB', 'RF', 'RBW', 'LR+L2') => 0.49
('LR+L2', 'FLSTM', 'RBB', 'RF') => 0.49
('FLSTM', 'RF', 'RBB', 'LR+L2') => 0.49
('RBB', 'RBW', 'LR+L2', 'RF', 'FLSTM') => 0.49
('LR+L2', 'RF', 'RBB', 'FLSTM', 'RBW') => 0.49
('RBW', 'LR+L2', 'FLSTM', 'RBB', 'RF') => 0.49
('LR+L2', 'RF', 'FLSTM', 'RBB') => 0.49
('RF', 'FLSTM', 'LR+L2') => 0.49
('FLSTM', 'RBW', 'RF', 'LR+L2') => 0.49
('RF', 'LR+L2', 'FLSTM', 'RBW', 'RBB') => 0.49
('FLSTM', 'LR+L2', 'RBW', 'RF') => 0.49
('RF', 'LR+L2', 'RBB', 'FLSTM', 'RBW') => 0.49
('FLSTM', 'RF', 'LR+L2', 'RBB') => 0.49
('RBW', 'RF', 'FLSTM', 'RBB', 'LR+L2') => 0.49
('FLSTM', 'RBW', 'RBB', 'RF', 'LR+L2') => 0.49
('FLSTM', 'LR+L2', 'RBW', 'RF', 'RBB') => 0.49
('RBB', 'LR+L2', 'RF', 'FLSTM') => 0.49
('RBB', 'FLSTM', 'RF', 'LR+L2', 'RBW') => 0.49
('LR+L2', 'RF', 'RBW', 'RBB', 'FLSTM') => 0.49
('LR+L2', 'RF', 'RBB', 'RBW', 'FLSTM') => 0.49
('FLSTM', 'RBW', 'RBB', 'LR+L2', 'RF') => 0.49
('FLSTM', 'RBW', 'LR+L2', 'RBB', 'RF') => 0.49
('RF', 'RBB', 'FLSTM', 'LR+L2') => 0.49
('RF', 'RBW', 'LR+L2', 'FLSTM', 'RBB') => 0.49
('FLSTM', 'LR+L2', 'RF', 'RBB', 'RBW') => 0.49
('RBB', 'RF', 'RBW', 'FLSTM', 'LR+L2') => 0.49
('RBB', 'RF', 'FLSTM', 'LR+L2', 'RBW') => 0.49
('RBB', 'LR+L2', 'RF', 'FLSTM', 'RBW') => 0.49
('RBB', 'LR+L2', 'FLSTM', 'RF', 'RBW') => 0.49
('FLSTM', 'RF', 'RBB', 'RBW', 'LR+L2') => 0.49
('RBW', 'RBB', 'FLSTM', 'LR+L2', 'RF') => 0.49
('RBB', 'LR+L2', 'FLSTM', 'RF') => 0.49
('FLSTM', 'RBW', 'LR+L2', 'RF') => 0.49
('FLSTM', 'LR+L2', 'RBB', 'RF', 'RBW') => 0.49
('LR+L2', 'RBB', 'FLSTM', 'RBW', 'RF') => 0.49
('RF', 'RBB', 'RBW', 'LR+L2', 'FLSTM') => 0.49
('LR+L2', 'RBW', 'FLSTM', 'RF') => 0.49
('RF', 'RBB', 'LR+L2', 'RBW', 'FLSTM') => 0.49
('RBB', 'RF', 'RBW', 'LR+L2', 'FLSTM') => 0.49
('RBB', 'RF', 'LR+L2', 'RBW', 'FLSTM') => 0.49
('LR+L2', 'RBW', 'RF', 'FLSTM', 'RBB') => 0.49
('RBW', 'FLSTM', 'RF', 'RBB', 'LR+L2') => 0.49
('LR+L2', 'RBW', 'FLSTM', 'RF', 'RBB') => 0.49
('LR+L2', 'RBB', 'RBW', 'RF', 'FLSTM') => 0.49
('LR+L2', 'RBW', 'RF', 'FLSTM') => 0.49
('LR+L2', 'FLSTM', 'RF', 'RBW') => 0.49
('RBW', 'LR+L2', 'RF', 'RBB', 'FLSTM') => 0.49
('LR+L2', 'RF', 'RBW', 'FLSTM', 'RBB') => 0.49
('RF', 'FLSTM', 'RBB', 'LR+L2', 'RBW') => 0.49
('RF', 'FLSTM', 'LR+L2', 'RBB', 'RBW') => 0.49
('RF', 'RBW', 'FLSTM', 'RBB', 'LR+L2') => 0.49
('RF', 'FLSTM', 'LR+L2', 'RBW') => 0.49