In [ ]:
from __future__ import division
import pickle
import os
from sklearn import metrics
import numpy as np
import pandas as pd
from lentil import evaluate
from lentil import models
import mem
In [ ]:
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
In [ ]:
import matplotlib as mpl
mpl.rc('savefig', dpi=300)
mpl.rc('text', usetex=True)
mpl.rc('text.latex', preamble='\usepackage{amsfonts}')
In [ ]:
import matplotlib.lines as mlines
In [ ]:
import logging
logging.getLogger().setLevel(logging.DEBUG)
In [ ]:
with open(os.path.join('data', 'dutch_big_history.pkl'), 'rb') as f:
history = pickle.load(f)
Build content features for mnemosyne_v2
In [ ]:
with open(os.path.join('data', 'content_features.pkl'), 'rb') as f:
contents_of_item_id = pickle.load(f)
In [ ]:
content_features = {k: (len(f) if f is not None else len(b)) for k, (b, f) in contents_of_item_id.iteritems()}
Build content features for dutch_big
In [ ]:
with open(os.path.join('data', 'original_of_module_id.pkl'), 'rb') as f:
original_of_module_id = pickle.load(f)
In [ ]:
embedding_of_word = {}
with open(os.path.join('data', 'embeddings', 'cbow', 'size=50.embeddings'), 'rb') as f:
for line in f:
fields = line.strip().split(' ')
embedding_of_word[fields[0]] = np.array([float(x) for x in fields[1:]])
In [ ]:
count_of_word = {}
with open(os.path.join('data', 'embeddings', 'cbow', 'unigram_frequencies'), 'rb') as f:
for line in f:
fields = line.strip().split(' ')
count_of_word[fields[0]] = int(fields[1])
total_count = sum(count_of_word.itervalues())
freq_of_word = {k: (v / total_count) for k, v in count_of_word.iteritems()}
In [ ]:
content_features = {k: np.append(
embedding_of_word[original_of_module_id[k]],
[len(original_of_module_id[k]), freq_of_word[original_of_module_id[k]]]) \
for k in history.data['module_id'].unique()}
In [ ]:
content_features = {k: np.array(
[len(original_of_module_id[k]), freq_of_word[original_of_module_id[k]]]) \
for k in history.data['module_id'].unique()}
In [ ]:
content_features = {k: np.array(
[len(original_of_module_id[k])]) \
for k in history.data['module_id'].unique()}
Setup the IRT benchmark models and memory models
In [ ]:
def build_1pl_irt_model(history, filtered_history, split_history=None):
model = models.OneParameterLogisticModel(
filtered_history, select_regularization_constant=True, name_of_user_id='user_id')
model.fit()
return model
def build_2pl_irt_model(history, filtered_history, split_history=None):
model = models.TwoParameterLogisticModel(
filtered_history, select_regularization_constant=True, name_of_user_id='user_id')
model.fit()
return model
def build_student_biased_coin_model(history, filtered_history, split_history=None):
model = models.StudentBiasedCoinModel(history, filtered_history, name_of_user_id='user_id')
model.fit()
return model
def build_assessment_biased_coin_model(history, filtered_history, split_history=None):
model = models.AssessmentBiasedCoinModel(history, filtered_history)
model.fit()
return model
def meta_build_efc_model(
strength_model='deck', using_delay=True,
using_global_difficulty=True, debug_mode_on=True, content_features=None,
coeffs_regularization_constant=1e-6, item_bias_regularization_constant=1e-6,
using_item_bias=True):
def build_efc_model(history, filtered_history, split_history=None):
model = mem.EFCModel(
filtered_history, strength_model=strength_model, using_delay=using_delay,
using_global_difficulty=using_global_difficulty, debug_mode_on=debug_mode_on,
content_features=content_features, using_item_bias=using_item_bias)
model.fit(
learning_rate=0.1,
#learning_rate=(1 if not using_global_difficulty else 0.1),
ftol=1e-6, max_iter=10000,
coeffs_regularization_constant=coeffs_regularization_constant,
item_bias_regularization_constant=item_bias_regularization_constant)
return model
return build_efc_model
def meta_build_logistic_regression_model(C=1.0):
def build_logistic_regression_model(history, filtered_history, split_history=None):
model = mem.LogisticRegressionModel(filtered_history)
model.fit(C=C)
return model
return build_logistic_regression_model
In [ ]:
model_builders = {
'1PL IRT' : build_1pl_irt_model,
'EFC I/-/-' : meta_build_efc_model(
strength_model='deck', using_delay=True, using_global_difficulty=False,
content_features=None, using_item_bias=True,
item_bias_regularization_constant=1e-3),
'EFC I/G/-' : meta_build_efc_model(
strength_model='deck', using_delay=True, using_global_difficulty=True,
content_features=None, using_item_bias=True,
item_bias_regularization_constant=1e-3, coeffs_regularization_constant=1e-3),
'EFC I/G/B' : meta_build_efc_model(
strength_model='deck', using_delay=True, using_global_difficulty=True,
content_features=content_features, using_item_bias=True,
item_bias_regularization_constant=1e-3, coeffs_regularization_constant=1e-3),
'EFC -/G/B' : meta_build_efc_model(
strength_model='deck', using_delay=True, using_global_difficulty=True,
content_features=content_features, using_item_bias=False,
coeffs_regularization_constant=1e-3),
'EFC -/-/B' : meta_build_efc_model(
strength_model='deck', using_delay=True, using_global_difficulty=False,
content_features=content_features, using_item_bias=False,
coeffs_regularization_constant=1e-3)
}
In [ ]:
print "Number of models = %d" % (len(model_builders))
print '\n'.join(model_builders.keys())
Perform the evaluations
In [ ]:
results = evaluate.cross_validated_auc(
model_builders,
history,
num_folds=10,
random_truncations=True)
In [ ]:
# dump results to file
with open(os.path.join('results', 'dutch_big_lesion_analysis.pkl'), 'wb') as f:
pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)
In [ ]:
# load results from file, replacing current results
with open(os.path.join('results', 'dutch_big_lesion_analysis.pkl'), 'rb') as f:
results = pickle.load(f)
In [ ]:
df = history.data
Compute validation AUCs for separate bins of data
In [ ]:
def compute_auc(y_trues, probas_pred):
try:
y_trues, probas_pred = zip(*[(y, p) for y, p in zip(y_trues, probas_pred) if not np.isnan(p)])
fpr, tpr, thresholds = metrics.roc_curve(y_trues, probas_pred, pos_label=1)
return metrics.auc(fpr, tpr)
except:
return np.nan
In [ ]:
ndata_in_logs = [df['module_id'].ix[idxes].value_counts() for idxes, y_trues, probas_pred in results.train_ixn_data]
ndata_of_val_ixns = [df['module_id'].ix[idxes].apply(lambda x: vc.get(x, 0)) for vc, (idxes, y_trues, probas_pred) in zip(ndata_in_logs, results.val_ixn_data)]
In [ ]:
num_bins = 5
hist, bin_edges = np.histogram([y for x in ndata_of_val_ixns for y in x], bins=num_bins)
t = [(x+y)/2 for x, y in zip(bin_edges[:-1], bin_edges[1:])]
In [ ]:
model_names = [
'1PL IRT',
'EFC I/-/-',
'EFC I/G/-',
'EFC I/G/B',
'EFC -/G/B',
'EFC -/-/B']
model_labels = [
'1PL IRT',
r'$\gamma_i$',
r'$\gamma_i + \beta_0$',
r'$\gamma_i + \beta_0 + \vec{\beta}_{1:n} \cdot \vec{x}_i$',
r'$\beta_0 + \vec{\beta}_{1:n} \cdot \vec{x}_i$',
r'$\vec{\beta}_{1:n} \cdot \vec{x}_i$']
In [ ]:
plt.xlabel(r'$\log{(\theta_i)}$')
plt.boxplot([results.validation_aucs(m) for m in model_names])
plt.scatter(
range(1, len(model_names) + 1),
[results.test_auc(m) for m in model_names],
color='orange', s=100)
plt.xticks(
range(1, len(model_names) + 1),
model_labels, rotation=15)
plt.xlim([0.5, len(model_names) + .5])
orange_circle = mlines.Line2D([], [], color='orange', marker='o', label='Test')
red_line = mlines.Line2D([], [], color='red', marker='_', label='Validation')
plt.legend(handles=[red_line, orange_circle], loc='best')
plt.ylabel('AUC')
plt.savefig(os.path.join('figures', 'dutch_big', 'auc-box-plots-efc-cgi.pdf'), bbox_inches='tight')
plt.show()
In [ ]:
label_of_m = dict(zip(model_names, model_labels))
In [ ]:
s_of_model = {}
for m in model_names:
s_of_model[m] = [[compute_auc(
[p for p, q in zip(y_trues, vf) if q>=x and (q<y or (bidx==len(bin_edges)-2 and q==y))],
[p for p, q in zip(probas_pred[m], vf) if q>=x and (q<y or (bidx==len(bin_edges)-2 and q==y))]) \
for (_, y_trues, probas_pred), vf in zip(results.val_ixn_data, ndata_of_val_ixns)] \
for bidx, (x, y) in enumerate(zip(bin_edges[:-1], bin_edges[1:]))]
In [ ]:
fig, ax1 = plt.subplots()
sns.set_style('dark')
ax2 = ax1.twinx()
ax2.bar(bin_edges[:-1], hist, [y-x for x, y in zip(bin_edges[:-1], bin_edges[1:])], color='gray', alpha=0.5, linewidth=0)
ax2.set_ylabel('Frequency (number of interactions)')
sns.set_style('darkgrid')
lines = []
for m, s1 in s_of_model.iteritems():
l1 = ax1.errorbar(
t, [np.nanmean(z) for z in s1], label='%s' % label_of_m[m],
yerr=[np.nanstd(z)/np.sqrt(len(z)) for z in s1])
lines.append(l1)
ax1.set_xlabel('Number of training logs for item')
ax1.set_ylabel('Validation AUC')
first_legend = plt.legend(handles=lines[:3], loc='lower center', bbox_to_anchor=(0.25, -0.4))
plt.gca().add_artist(first_legend)
plt.legend(handles=lines[3:], loc='lower center', bbox_to_anchor=(0.75, -0.4))
plt.savefig(os.path.join('figures', 'dutch_big', 'auc-vs-ndata.pdf'), bbox_inches='tight')
plt.show()
In [ ]: