In [1]:
%matplotlib qt4
from collections import defaultdict, OrderedDict
import numpy as np
import matplotlib.pyplot as plt
from models import PFAExt, EloModel, tools, tests
In [73]:
data = tools.load_data(limit=150000, offset=8*10**6)
In [178]:
class PFALoggingTiming(PFAExt):
def __init__(self, *args, **kwargs):
self.timings = defaultdict(list)
super(PFALoggingDiffs, self).__init__(*args, **kwargs)
def update(self, answer):
item = self.items[answer.user_id, answer.place_id]
if not item.practices:
self.prior.update(answer)
prediction = self.predict(answer)
self.predictions[answer.id] = prediction
if item.practices:
timing = tools.time_diff(answer.inserted, item.practices[-1].inserted)
self.timings[timing] += [(answer.is_correct, prediction)]
item.add_practice(answer)
if answer.is_correct:
item.inc_knowledge(self.gamma * (1 - prediction))
else:
item.inc_knowledge(self.delta * prediction)
class PFALoggingMemoryActivation(PFAExt):
def __init__(self, *args, **kwargs):
self.con = 0.05
self.timings = defaultdict(list)
super(PFALoggingMemoryActivation, self).__init__(*args, **kwargs)
def get_memory_inc(self, answer):
item = self.items[answer.user_id, answer.place_id]
con = 1 - self.con if answer.is_correct else self.con
return np.log(- con / (con - 1)) - item.knowledge
def update(self, answer):
item = self.items[answer.user_id, answer.place_id]
if not item.practices:
self.prior.update(answer)
prediction = self.predict(answer)
self.predictions[answer.id] = prediction
if item.practices:
timing = tools.time_diff(answer.inserted, item.practices[-1].inserted)
if abs(prediction - is_correct) > self.con:
self.timings[timing] += [self.get_memory_inc(answer)]
else:
self.timings[timing] += [0]
item.add_practice(answer)
if answer.is_correct:
item.inc_knowledge(self.gamma * (1 - prediction))
else:
item.inc_knowledge(self.delta * prediction)
In [75]:
pfa_log = PFALoggingTiming(EloModel())
pfa_log.train(data)
timings = pfa_log.timings
In [184]:
pfa_log = PFALoggingMemoryActivation(EloModel(), gamma=2.9, delta=-0.7)
pfa_log.train(data)
timings2 = pfa_log.timings
In [112]:
curves = defaultdict(OrderedDict)
counters = defaultdict(list)
sizes = {0: 1000, 1: 3000}
for i, t in enumerate(sorted(timings)):
if t < 5:
continue
for is_correct, prediction in timings[t]:
counters[is_correct] += [(t, is_correct - prediction)]
if len(counters[is_correct]) == sizes[is_correct]:
t_means = np.mean([ti for ti, _ in counters[is_correct]])
d_means = np.mean([di for _, di in counters[is_correct]])
curves[is_correct][t_means] = d_means
counters[is_correct] = []
In [113]:
plt.xscale('log')
plt.plot(curves[1].keys(), curves[1].values(), '.-',
curves[0].keys(), curves[0].values(), '.-')
Out[113]:
In [110]:
curve = OrderedDict()
counter = []
for i, t in enumerate(sorted(timings)):
if t < 5:
continue
for is_correct, prediction in timings[t]:
counter += [(t, is_correct - prediction)]
if len(counter) == 4000:
t_means = np.mean([ti for ti, _ in counter])
d_means = np.mean([di for _, di in counter])
curve[t_means] = d_means
counter = []
In [114]:
plt.xscale('log')
plt.plot(curve.keys(), curve.values(), '.-')
Out[114]:
In [185]:
curve2 = OrderedDict()
counter2 = []
for i, t in enumerate(sorted(timings2)):
if t < 5:
continue
for m in timings2[t]:
counter2 += [(t, m)]
if len(counter2) == 3000 or i+1 == len(timings2):
t_means = np.mean([ti for ti, _ in counter2])
m_means = np.mean([mi for _, mi in counter2])
curve2[t_means] = m_means
counter2 = []
In [186]:
plt.xscale('log')
plt.plot(curve2.keys(), curve2.values(), '.-')
Out[186]:
In [68]:
bins = [0, 60, 90, 150, 300, 600, 60*30, 60*60*3, 60*60*24, 60*60*24*5]
vals = [1.43, 1.17, 1.01, 0.93, 0.82, 0.78, 0.76, 0.63, 0.42, 0.12]
linear_fit = tools.connect_points(zip(bins, vals))
In [69]:
X = (list(np.random.random_integers(0, 60*60*24*5, 500)) +
list(np.random.random_integers(0, 60*60, 500)))
In [70]:
plt.plot(X, [linear_fit(x) for x in X], '.',
bins, vals, 'o-')
plt.xscale('log')