We might need to check results to figure errors out.
a. What if predicted response position is greater than length of question b. What if predicted response position is less than 1
In [80]:
import csv
from utils import load_buzz, select, write_result
from features import featurize, get_pos
from containers import Questions, Users, Categories
from nlp import extract_entities
with open('0.72guess.csv') as csvfile:
scores = csv.reader(csvfile)
scores = list(scores)
# remove header
scores.pop(0)
questions = Questions(load_buzz())
tests = load_buzz()['test']
diff_sum = 0
print("** tid qid uid: pred_pos, q_length, diff")
for pred_score in scores:
pred_tid = int(pred_score[0])
pred_pos = float(pred_score[1])
qid = tests[pred_tid]['qid']
uid = tests[pred_tid]['uid']
if qid in questions:
q_length = len(questions[qid]['question'].split())
#q_length = max(questions[qid]['pos_token'].keys())
if abs(pred_pos) > q_length:
diff = abs(pred_pos) - q_length
print(pred_tid, qid, uid, ":", pred_pos, ",", q_length, ",", diff)
diff_sum += + diff
print("** diff_tot", diff_sum)
In [83]:
print(len([x[1] for x in scores if float(x[1]) < 1.0 and float(x[1]) > -1.0]))
[x[1] for x in scores if float(x[1]) < 1.0 and float(x[1]) > -1.0]
Out[83]:
In [ ]: