In [ ]:
from __future__ import division
from collections import defaultdict
import json
import pickle
import os
import pandas as pd
import numpy as np
from scipy import misc
In [ ]:
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
In [ ]:
import matplotlib as mpl
mpl.rc('savefig', dpi=200)
mpl.rc('text', usetex=True)
Load the log data from the Mechanical Turk experiments
In [ ]:
data = []
with open(os.path.join('data', 'dataset_leitner.dump'), 'rb') as f:
for line in f:
data.append(json.loads(line))
In [ ]:
num_decks = 5
session_duration = 15 * 60 # 15 minutes
In [ ]:
std_err = lambda x: np.nanstd(x) / np.sqrt(len(x))
In [ ]:
df = pd.DataFrame(data)
In [ ]:
df.sort('card_time', inplace=True) # sort in chronological order
In [ ]:
df
Do some pre-processing
In [ ]:
df = df[df['worker_id'].apply(lambda x: 'IGOR' not in x)] # filter out artifacts from platform debugging
In [ ]:
df['user_id'] = df['worker_id'] # a 'user' can have multiple sessions
df['worker_id'] = df['worker_id'] + df['vocab'] # a 'worker' exists for a single session
In [ ]:
# deck = num_decks corresponds to a new item (i.e., deck = 0) for sessions other than those for vocab.list.japanese.0
# don't need to take the deck column too seriously, since we re-compute decks for important analysis
df['deck'] = df.apply(lambda row: 0 if row['deck'] == num_decks and row['vocab'] != 'vocab.list.japanese.0' else row['deck'], axis=1)
df['deck'] = df['deck'] + 1 # shift decks from [0, num_decks-1] to [1, num_decks]
In [ ]:
nreps = []
delay = []
user_items = (df['worker_id'] + '-' + df['foreign']).unique()
nreps_of_user_item = {k: 0 for k in user_items}
prev_timestamp_of_user_item = {k: np.nan for k in user_items}
for _, ixn in df.iterrows():
user_item = ixn['worker_id'] + '-' + ixn['foreign']
timestamp = ixn['card_time']
nreps.append(nreps_of_user_item[user_item])
delay.append(timestamp - prev_timestamp_of_user_item[user_item])
nreps_of_user_item[user_item] += 1
prev_timestamp_of_user_item[user_item] = timestamp
In [ ]:
df['nreps'] = nreps # number of repetitions for user-item pair
df['delay'] = delay # time elapsed (milliseconds) since previous review for user-item pair
In [ ]:
df['outcome'] = df['score'].apply(lambda x: 0 if x<=2 else 1) # discretize scores into binary outcomes
In [ ]:
# extract assigned arrival rate for each interaction
arrival_rate = []
for _, row in df.iterrows():
ar = np.nan
if not np.isnan(row['rate']):
ar = row['rate']
elif type(row['probs']) == list:
ar = row['probs'][-1]
arrival_rate.append(ar)
df['arrival_rate'] = arrival_rate
In [ ]:
# how much data is there for each experimental condition?
df['arrival_rate'].value_counts()
Compute basic stats summarizing the data
In [ ]:
num_items_per_session = []
recall_rates = []
session_lengths = []
for _, group in df.groupby('worker_id'):
num_items_per_session.append(len(group['foreign'].unique()))
recall_rates.append(np.mean(group['outcome']))
session_lengths.append(len(group))
In [ ]:
print "Number of interactions = %d" % len(df)
print "Number of users = %d" % len(df['user_id'].unique())
print "Number of items = %d" % len(df['foreign'].unique())
print "Number of sessions = %d" % len(df['worker_id'].unique())
print "Overall recall rate = %0.3f" % np.mean(df['outcome'])
print "Average number of interactions in session = %0.3f" % np.mean(session_lengths)
In [ ]:
np.mean(num_items_per_session)
In [ ]:
plt.xlabel('Number of Unique Items Seen During Session')
plt.ylabel('Frequency (Number of Sessions)')
plt.hist(num_items_per_session)
plt.savefig(os.path.join('figures', 'mturk', 'num-unique-items-seen-per-session.pdf'))
plt.show()
Our estimate of the empirical review frequency budget $U$ is as follows
In [ ]:
np.mean(np.array(session_lengths) / session_duration)
In [ ]:
plt.xlabel('log10(Number of Interactions In Session)')
plt.ylabel('Frequency (Number of Sessions)')
plt.hist(np.log10(np.array(session_lengths)+1))
plt.savefig(os.path.join('figures', 'mturk', 'num-ixns-per-session.pdf'))
plt.show()
In [ ]:
num_sessions_per_person = []
for _, group in df.groupby('user_id'):
num_sessions_per_person.append(len(group['vocab'].unique()))
In [ ]:
np.mean(num_sessions_per_person)
In [ ]:
plt.xlabel('Number of Sessions')
plt.ylabel('Frequency (Number of Users)')
plt.hist(num_sessions_per_person)
plt.savefig(os.path.join('figures', 'mturk', 'num-sessions-per-person.pdf'))
plt.show()
In [ ]:
decks = range(1, 1 + num_decks)
outcomes = [None] * num_decks
for deck, group in df[~np.isnan(df['deck'])].groupby('deck'):
if deck <= num_decks:
outcomes[int(deck) - 1] = group['outcome'].values
In [ ]:
plt.xlabel('Deck')
plt.ylabel('Empirical Recall Rate')
plt.errorbar(decks, [np.nanmean(x) for x in outcomes], yerr=[std_err(x) for x in outcomes])
plt.xticks(decks)
plt.savefig(os.path.join('figures', 'mturk', 'recall-rate-vs-deck.pdf'))
plt.show()
In [ ]:
nreps = range(max(df['nreps']) + 1)
outcomes = [df[df['nreps']==x]['outcome'].values for x in nreps]
In [ ]:
plt.xlabel('Number of repetitions')
plt.ylabel('Empirical Recall Rate')
plt.errorbar(nreps, [np.nanmean(x) for x in outcomes], yerr=[std_err(x) for x in outcomes])
plt.xscale('log')
plt.savefig(os.path.join('figures', 'mturk', 'recall-rate-vs-nreps.pdf'))
plt.show()
In [ ]:
delay_ticks = np.arange(0, 6.5, 0.1)
recall_rates = []
for x, y in zip(delay_ticks[:-1], delay_ticks[1:]):
recall_rates.append(df[df['delay'].apply(lambda z: z > 0 and np.log10(1+z) >= x and np.log10(1+z) < y)]['outcome'].values)
In [ ]:
plt.xlabel('log10(Delay) (log10-milliseconds)')
plt.ylabel('Empirical Recall Rate')
plt.errorbar([(x+y)/2 for x, y in zip(delay_ticks[:-1], delay_ticks[1:])], [np.mean(x) for x in recall_rates], yerr=[std_err(x) for x in recall_rates])
plt.savefig(os.path.join('figures', 'mturk', 'recall-rate-vs-delay.pdf'))
plt.show()
In [ ]:
plt.xlabel('log10(Delay) (log10-milliseconds)')
plt.ylabel('Normalized Frequency (Fraction of Total Interactions)')
x = np.array(df[df['outcome']==0]['delay'].values)
x = x[(~np.isnan(x)) & (x>0)]
plt.hist(np.log10(1+x), alpha=0.5, label='forgotten', normed=True, linewidth=0)#, bins=20)
x = np.array(df[df['outcome']==1]['delay'].values)
x = x[(~np.isnan(x)) & (x>0)]
plt.hist(np.log10(1+x), alpha=0.5, label='recalled', normed=True, linewidth=0)#, bins=20)
plt.legend(loc='best')
plt.savefig(os.path.join('figures', 'mturk', 'delays-cond-outcomes.pdf'))
plt.show()
In [ ]:
fpr = []
for _, group in df.groupby('user_id'):
vc = group['score'].value_counts()
fpr.append(1 - ((1 + vc.get(0, 0)) / (2 + vc.get(0, 0) + vc.get(1, 0) + vc.get(2, 0))))
In [ ]:
plt.xlabel("False Positive Rate")
plt.ylabel('Frequency (Number of Users)')
plt.title('Know Thyself, Turker!')
plt.hist(fpr, bins=20)
plt.savefig(os.path.join('figures', 'mturk', 'know-thyself-fpr.pdf'))
plt.show()
In [ ]:
plt.xlabel('Recall Rate')
plt.ylabel('Frequency (Number of Sessions)')
plt.hist(df.groupby('worker_id')['outcome'].mean().values)#, bins=20)
plt.savefig(os.path.join('figures', 'mturk', 'user-recall-rates.pdf'))
plt.show()
In [ ]:
df.groupby('foreign')['outcome'].mean().sort(inplace=False)
In [ ]:
plt.xlabel('Recall Rate')
plt.ylabel('Frequency (Number of Items)')
plt.hist(df.groupby('foreign')['outcome'].mean().values)#, bins=20)
plt.savefig(os.path.join('figures', 'mturk', 'item-recall-rates.pdf'))
plt.show()
In [ ]:
df_fit = df[~np.isnan(df['delay'])]
In [ ]:
delays = np.array(df_fit['delay'].values) / 1000 # seconds
decks = np.array(df_fit['deck'])
nreps = np.array(df_fit['nreps']) + 1
outcomes = np.array(df_fit['outcome'])
In [ ]:
thetas = np.arange(0.0072, 0.0082, 0.00001)
In [ ]:
len(thetas)
In [ ]:
lls = []
for theta in thetas:
ll_pass = -theta * delays / decks
ll_fail = np.log(1 - np.exp(-theta * delays / decks))
ll = outcomes * ll_pass + (1 - outcomes) * ll_fail
lls.append(np.nansum(ll[np.isfinite(ll)]))
In [ ]:
lls = np.array(lls)
marginal_lik = misc.logsumexp(lls)
In [ ]:
posteriors = np.exp(lls - marginal_lik)
In [ ]:
plt.xlabel(r'Item Difficulty $\theta$')
plt.ylabel(r'Posterior Probability $P(\theta \mid D)$')
plt.plot(thetas, posteriors)
plt.savefig(os.path.join('figures', 'mturk', 'item-difficulty-posterior.pdf'))
plt.show()
Our maximum-likelihood estimate of the global item difficulty $\theta$ is as follows
In [ ]:
thetas[max(range(len(thetas)), key=lambda x: lls[x])]
Examine phase transition
In [ ]:
arrival_rate = []
final_deck_distrn = []
num_mastered = []
for worker_id, group in df.groupby('worker_id'):
try:
vx = int(100 * group['arrival_rate'].values[-1]) / 100 # handle weird rounding issues
# re-compute the 'deck' column
deck_of_item = {k: 0 for k in group['foreign'].unique()}
for _, ixn in group.iterrows():
item = ixn['foreign']
outcome = ixn['outcome']
if outcome == 1:
deck_of_item[item] += 1
elif outcome == 0 and deck_of_item[item] > 0:
deck_of_item[item] -= 1
items_of_deck = defaultdict(set)
for k, v in deck_of_item.iteritems():
items_of_deck[min(v, num_decks)] |= {k}
vy = [len(items_of_deck[x]) for x in xrange(num_decks + 1)]
vz = vy[-1]
arrival_rate.append(vx)
final_deck_distrn.append(vy)
num_mastered.append(vz)
except:
pass
In [ ]:
unique_arrival_rates = sorted(set(arrival_rate))
num_mastered_of_arrival_rate = {k: [] for k in unique_arrival_rates}
final_deck_distrn_of_arrival_rate = {k: [] for k in unique_arrival_rates}
for x, y, z in zip(arrival_rate, num_mastered, final_deck_distrn):
num_mastered_of_arrival_rate[x].append(y)
final_deck_distrn_of_arrival_rate[x].append(z)
In [ ]:
unique_arrival_rates
In [ ]:
# scale arrival rates from probabilities to proper 'rates' (i.e., having units 'items per second')
scaled_unique_arrival_rates = np.array(unique_arrival_rates) * np.mean(session_lengths) / session_duration
In [ ]:
scaled_unique_arrival_rates
In [ ]:
with open(os.path.join('results', 'theoretical-vs-simulated-phase-transition.pkl'), 'rb') as f:
simulated_arrival_rates, simulated_throughputs, theoretical_phase_transition_threshold = pickle.load(f)
In [ ]:
plt.xlabel(r'Arrival Rate $\lambda_{ext}$ (Items Per Second)')
plt.ylabel(r'Throughput $\lambda_{out}$ (Items Per Second)')
plt.errorbar(
scaled_unique_arrival_rates[:-1],
[np.mean(np.array(num_mastered_of_arrival_rate[x]) / (15 * 60)) for x in unique_arrival_rates[:-1]],
yerr=[std_err(np.array(num_mastered_of_arrival_rate[x]) / (15 * 60)) for x in unique_arrival_rates[:-1]],
label='Empirical', color='orange')
plt.errorbar(simulated_arrival_rates, [np.mean(y) for y in simulated_throughputs], yerr=[std_err(y) for y in simulated_throughputs], label='Simulated (Clocked Delay)', color='green')
plt.legend(loc='best')
plt.savefig(os.path.join('figures', 'mturk', 'empirical-and-simulated-throughput-vs-arrival-rate.pdf'))
plt.show()
In [ ]:
plt.xlabel(r'Arrival Rate $\lambda_{ext}$ (Items Per Second)')
plt.ylabel(r'Throughput $\lambda_{out}$ (Items Per Second)')
plt.errorbar(
scaled_unique_arrival_rates[:-1],
[np.mean(np.array(num_mastered_of_arrival_rate[x]) / (15 * 60)) for x in unique_arrival_rates[:-1]],
yerr=[std_err(np.array(num_mastered_of_arrival_rate[x]) / (15 * 60)) for x in unique_arrival_rates[:-1]],
label='Empirical', color='orange')
plt.errorbar(simulated_arrival_rates, [np.mean(y) for y in simulated_throughputs], yerr=[std_err(y) for y in simulated_throughputs], label='Simulated (Clocked Delay)', color='green')
plt.axvline(x=theoretical_phase_transition_threshold, label='Phase Transition Threshold (Theoretical)', linestyle='--')
plt.legend(loc='best')
plt.savefig(os.path.join('figures', 'mturk', 'empirical-and-simulated-and-theoretical-throughput-vs-arrival-rate.pdf'))
plt.show()
In [ ]:
plt.xlabel('Arrival Rate $\lambda_{ext}$ (Items Per Second)')
plt.ylabel('Number of Items')
deck_distrns = [[[] for _ in unique_arrival_rates[:-1]] for _ in xrange(num_decks + 1)]
for i, x in enumerate(unique_arrival_rates[:-1]):
for deck_distrn in final_deck_distrn_of_arrival_rate[x]:
y = np.array(deck_distrn, dtype=float)
for j, z in enumerate(y):
deck_distrns[j][i].append(z)
for i, dd in enumerate(deck_distrns):
label = 'Deck %d' % (i+1)
if i == num_decks:
label = 'Mastered'
plt.errorbar(scaled_unique_arrival_rates[:-1], [np.mean(x) for x in dd], yerr=[std_err(x) for x in dd], label=label)
plt.legend(loc='best')
plt.savefig(os.path.join('figures', 'mturk', 'num-items-vs-arrival-rate-cond-deck.pdf'))
plt.show()
In [ ]:
plt.xlabel('Arrival Rate $\lambda_{ext}$ (Items Per Second)')
plt.ylabel('Fraction of Items Seen During Session')
deck_distrns = [[[] for _ in unique_arrival_rates[:-1]] for _ in xrange(num_decks + 1)]
for i, x in enumerate(unique_arrival_rates[:-1]):
for deck_distrn in final_deck_distrn_of_arrival_rate[x]:
y = np.array(deck_distrn, dtype=float)
y /= y.sum()
for j, z in enumerate(y):
deck_distrns[j][i].append(z)
for i, dd in enumerate(deck_distrns):
label = 'Deck %d' % (i+1)
if i == num_decks:
label = 'Mastered'
plt.errorbar(scaled_unique_arrival_rates[:-1], [np.mean(x) for x in dd], yerr=[std_err(x) for x in dd], label=label)
plt.legend(loc='best')
plt.savefig(os.path.join('figures', 'mturk', 'frac-items-vs-arrival-rate-cond-deck.pdf'))
plt.show()
In [ ]:
plt.xlabel('Deck')
plt.ylabel('Fraction of Items Seen During Session')
colors = [None] * len(unique_arrival_rates[:-1])
colors[1] = 'red'
colors[3] = 'orange'
colors[7] = 'deepskyblue'
colors[10] = 'blue'
for i, (x, z) in enumerate(zip(unique_arrival_rates[:-1], scaled_unique_arrival_rates[:-1])):
if not i in [1, 3, 7, 10]: # cherry-picked
continue
deck_distrns = [[] for _ in xrange(num_decks + 1)]
for deck_distrn in final_deck_distrn_of_arrival_rate[x]:
y = np.array(deck_distrn, dtype=float)
y /= y.sum()
for j, w in enumerate(y):
deck_distrns[j].append(w)
plt.errorbar(
range(1, len(deck_distrns) + 1), [np.mean(x) for x in deck_distrns],
yerr=[std_err(x) for x in deck_distrns], label=r'$\lambda_{ext} = %0.3f$ (%s Phase Transition)' % (z, 'Before' if i <= 3 else 'After'),
color=colors[i])
plt.xlim([0.5, num_decks + 1.5])
plt.legend(loc='best')
plt.savefig(os.path.join('figures', 'mturk', 'frac-items-vs-deck-cond-arrival-rate.pdf'))
plt.show()
In [ ]: