In [128]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from utils import load_buzz, select, write_result
from features import featurize, get_pos
from containers import Questions, Users, Categories
from nlp import extract_entities
In [ ]:
import pickle
questions = pickle.load(open('questions01.pkl', 'rb'))
users = pickle.load(open('users01.pkl', 'rb'))
categories = pickle.load(open('categories01.pkl', 'rb'))
In [119]:
from collections import defaultdict
from numpy import sign
def get_cat_pos_per_uid(bd):
cat_pos_uid = defaultdict(lambda: defaultdict(list))
for key in bd:
uid = bd[key]['uid']
cat = questions[bd[key]['qid']]['category'].lower()
pos = bd[key]['position']
cat_pos_uid[uid][cat].append(pos)
return cat_pos_uid
def cal_acc_cat_per_uid(bd):
acc_cat_per_uid = defaultdict(lambda: defaultdict(float))
cat_pos_uid = get_cat_pos_per_uid(bd)
for uid in cat_pos_uid:
for cat in cat_pos_uid[uid]:
answers = sign(cat_pos_uid[uid][cat])
acc = list(answers).count(1) / float(len(answers))
acc_cat_per_uid[uid][cat] = acc
return acc_cat_per_uid
def get_acc_cat_per_uid(bd):
acc_cat_per_uid = acc_cat_per_uid_inter.copy()
for uid in acc_cat_per_uid_inter:
for cat in categories:
if cat not in acc_cat_per_uid[uid]:
acc_cat_per_uid[uid][cat] = categories[cat]['acc_ratio_cat']
return acc_cat_per_uid
In [120]:
acc_cat_per_uid = get_acc_cat_per_uid(load_buzz()['train'])
In [124]:
for uid in users:
for cat in categories.keys():
users[uid]['acc_cat_' + cat] = acc_cat_per_uid[uid][cat]
In [126]:
users[100]
Out[126]:
In [127]:
import pickle
for ii in ['users']:
file_name = ii + '01.pkl'
with open(file_name, 'wb') as f:
nes = pickle.dump(globals()[ii], f, protocol=2)