error01_pred_pos_gt_q_length

We might need to check results to figure errors out.

a. What if predicted response position is greater than length of question b. What if predicted response position is less than 1


In [80]:
import csv

from utils import load_buzz, select, write_result
from features import featurize, get_pos
from containers import Questions, Users, Categories
from nlp import extract_entities


with open('0.72guess.csv') as csvfile:
    scores = csv.reader(csvfile)
    scores = list(scores)

# remove header
scores.pop(0)

questions = Questions(load_buzz())
tests = load_buzz()['test']
diff_sum = 0
print("** tid qid uid: pred_pos, q_length, diff")
for pred_score in scores:
    pred_tid = int(pred_score[0])
    pred_pos = float(pred_score[1])
    qid = tests[pred_tid]['qid']
    uid = tests[pred_tid]['uid']
    if qid in questions:
        q_length = len(questions[qid]['question'].split())
        #q_length = max(questions[qid]['pos_token'].keys())
        if abs(pred_pos) > q_length:
            diff = abs(pred_pos) - q_length
            print(pred_tid, qid, uid, ":", pred_pos, ",", q_length, ",", diff)
            diff_sum += + diff
            
print("** diff_tot", diff_sum)


** tid qid uid: pred_pos, q_length, diff
32193 123840 15 : 34.0123999179 , 33 , 1.012399917899998
32186 123840 28 : 35.059582766 , 33 , 2.0595827659999983
32081 123757 1 : 32.5539257933 , 29 , 3.553925793300003
32067 123750 43 : 25.8708545617 , 19 , 6.8708545617
32025 123718 39 : 19.4536636271 , 13 , 6.453663627099999
32018 123718 13 : 20.8354790007 , 13 , 7.835479000700001
31997 123709 52 : 29.2554188639 , 17 , 12.255418863900001
13083 106213 9 : 34.3700554191 , 21 , 13.370055419099998
12908 106060 124 : 38.9119671024 , 37 , 1.9119671023999985
1232 674 161 : 48.2526331817 , 48 , 0.2526331816999985
24990 106373 62 : 29.1956598195 , 23 , 6.195659819500001
** diff_tot 61.7716400533

In [83]:
print(len([x[1] for x in scores if float(x[1]) < 1.0 and float(x[1]) > -1.0]))
[x[1] for x in scores if float(x[1]) < 1.0 and float(x[1]) > -1.0]


37
Out[83]:
['0.411762859547',
 '0.548684527721',
 '0.417215378227',
 '0.885190165274',
 '0.903842481014',
 '-0.818594323963',
 '-0.985039708421',
 '-0.605746623982',
 '0.862697590958',
 '-0.273778275696',
 '-0.44766898547',
 '0.858614576988',
 '0.442675359599',
 '0.840791525727',
 '0.480208332493',
 '0.0439538513882',
 '0.633145583629',
 '0.388121442757',
 '-0.391576058921',
 '0.122966891992',
 '0.567030390322',
 '-0.491910606052',
 '0.570451919832',
 '-0.48926593946',
 '-0.692053780514',
 '0.231540027305',
 '-0.48255797431',
 '-0.92540770738',
 '-0.466494448178',
 '0.547971332207',
 '-0.851434916452',
 '0.111587793014',
 '-0.459855797295',
 '-0.357544285217',
 '0.0217785919356',
 '0.488019397626',
 '0.360854671841']

In [ ]: