
We might need to check results to figure errors out.

a. What if predicted response position is greater than length of question b. What if predicted response position is less than 1

In [80]:
import csv

from utils import load_buzz, select, write_result
from features import featurize, get_pos
from containers import Questions, Users, Categories
from nlp import extract_entities

with open('0.72guess.csv') as csvfile:
    scores = csv.reader(csvfile)
    scores = list(scores)

# remove header

questions = Questions(load_buzz())
tests = load_buzz()['test']
diff_sum = 0
print("** tid qid uid: pred_pos, q_length, diff")
for pred_score in scores:
    pred_tid = int(pred_score[0])
    pred_pos = float(pred_score[1])
    qid = tests[pred_tid]['qid']
    uid = tests[pred_tid]['uid']
    if qid in questions:
        q_length = len(questions[qid]['question'].split())
        #q_length = max(questions[qid]['pos_token'].keys())
        if abs(pred_pos) > q_length:
            diff = abs(pred_pos) - q_length
            print(pred_tid, qid, uid, ":", pred_pos, ",", q_length, ",", diff)
            diff_sum += + diff
print("** diff_tot", diff_sum)

** tid qid uid: pred_pos, q_length, diff
32193 123840 15 : 34.0123999179 , 33 , 1.012399917899998
32186 123840 28 : 35.059582766 , 33 , 2.0595827659999983
32081 123757 1 : 32.5539257933 , 29 , 3.553925793300003
32067 123750 43 : 25.8708545617 , 19 , 6.8708545617
32025 123718 39 : 19.4536636271 , 13 , 6.453663627099999
32018 123718 13 : 20.8354790007 , 13 , 7.835479000700001
31997 123709 52 : 29.2554188639 , 17 , 12.255418863900001
13083 106213 9 : 34.3700554191 , 21 , 13.370055419099998
12908 106060 124 : 38.9119671024 , 37 , 1.9119671023999985
1232 674 161 : 48.2526331817 , 48 , 0.2526331816999985
24990 106373 62 : 29.1956598195 , 23 , 6.195659819500001
** diff_tot 61.7716400533

In [83]:
print(len([x[1] for x in scores if float(x[1]) < 1.0 and float(x[1]) > -1.0]))
[x[1] for x in scores if float(x[1]) < 1.0 and float(x[1]) > -1.0]


In [ ]: