In [3]:
# August 8 2017
# This file is for experiments with student2 5 skills and training trajectory length 6,7,8
# to check different architectures and whether they can learn good models
# This uses student2a where transitions happen after observations
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import scipy as sp
import six
from matplotlib.pyplot import *
def ixs2ranks(ixs):
ranks = np.zeros(ixs.shape,dtype=np.int)
for r,i in enumerate(ixs):
ranks[i] = r
return ranks
def graph_trainauc_score(vloss, scores, postfix=None):
figure()
if postfix is not None:
xs = np.sum(vloss[:,-postfix:],axis=1)
else:
xs = np.sum(vloss,axis=1)
ys = scores
title('Training Loss AUC versus Actual Performance')
xlabel('Training Loss AUC')
ylabel('Posttest Score')
plot(xs,ys,'.')
ylim(0,1)
def graph_trainauc_qval(vloss, qval):
figure()
xs = np.sum(vloss,axis=1)
ys = qval
title('Training Loss AUC versus Own Qvalue')
xlabel('Training Loss AUC')
ylabel('Qvalue')
plot(xs,ys,'.')
def graph_qval_score(qval, scores):
figure()
xs = qval
ys = scores
title('Own Qval versus Actual Performance')
xlabel('Qvalue')
ylabel('Posttest Score')
plot(xs,ys,'.')
def graph_msloss_score(msloss, scores):
figure()
xs = msloss
ys = scores
title('Multistep Loss versus Actual Performance')
xlabel('Multistep Loss')
ylabel('Posttest Score')
plot(xs,ys,'.')
ylim(0,1)
In [5]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005A.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like 30-40 is enough, so settle on 40
Out[5]:
In [6]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005A.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like 50 is enough
Out[6]:
In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 6 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')
vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)
tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
num_models = scores.shape[0]
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)
ylim(-0.1,1.1)
graph_trainauc_qval(vloss,selfqvals)
ylim(-0.1,1.1)
Out[4]:
In [5]:
for i in six.moves.range(num_models):
ix = sorted_by_qval[i]
six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))
In [8]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')
vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)
tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
num_models = scores.shape[0]
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)
ylim(-0.1,1.1)
#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)
Out[8]:
In [7]:
for i in six.moves.range(num_models):
ix = sorted_by_qval[i]
six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))
In [2]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005A.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like wants 100 is enough
Out[2]:
In [3]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005A.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like 90 is enough
Out[3]:
In [3]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')
data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/msloss-runlr0005B.npz')
vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)
tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)
msloss = data41['msloss'][:,0,:]
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_msloss = np.argsort(msloss)
ranked_by_msloss = ixs2ranks(sorted_by_msloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
num_models = scores.shape[0]
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
# 48/50 good
graph_qval_score(selfqvals, scores)
ylim(-0.1,1.1)
graph_msloss_score(msloss, scores)
ylim(-0.1,1.1)
#graph_trainauc_(vloss,selfqvals)
#ylim(-0.1,1.1)
Out[3]:
In [5]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')
data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/msloss-runlr0005B.npz')
vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)
tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)
msloss = data41['msloss'][:,0,0]
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_msloss = np.argsort(msloss)
ranked_by_msloss = ixs2ranks(sorted_by_msloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_('msloss shape {}'.format(msloss.shape))
six.print_(scores)
num_models = scores.shape[0]
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
# 6/50 good
graph_msloss_score(msloss, scores)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
ylim(-0.1,1.1)
#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)
Out[5]:
In [6]:
for i in six.moves.range(num_models):
ix = sorted_by_msloss[i]
six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))
In [ ]:
In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Dropout 0.8
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout8-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr01A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-dropout8-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr01A.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
qvals = data21['qvals'][:,0]
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
graph_trainauc_score(vloss, scores)
graph_qval_score(qvals, scores)
In [8]:
'''
Analyzing results of student2 with 5 skills, with training length 6 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr01A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr01A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr01A.npz')
vloss = data11['vloss']
vloss_auc = np.sum(vloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
num_models = scores.shape[0]
# looks like 5/50 are good
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)for i in six.moves.range(num_models):
ylim(-0.1,1.1)
graph_trainauc_qval(vloss,selfqvals)
ylim(-0.1,1.1)
Out[8]:
In [10]:
for i in six.moves.range(num_models):
ix = sorted_by_qval[i]
six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))
# sorting by self-reported qval seems like it may work
In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr01A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr01A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr01A.npz')
vloss = data11['vloss']
vloss_auc = np.sum(vloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
num_models = scores.shape[0]
# looks like 30/50 are good
# 0-11 top vloss look good, except ix2 is slightly stochastic
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
# 0-17 top qvals are good
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)
ylim(-0.1,1.1)
graph_trainauc_qval(vloss,selfqvals)
ylim(-0.1,1.1)
Out[4]:
In [7]:
for i in six.moves.range(num_models):
ix = sorted_by_vloss[i]
six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))
In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr001A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr001A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr001A.npz')
#data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/msloss-runlr01A.npz')
vloss = data11['vloss'][:,-3:]
vloss_auc = np.sum(vloss,axis=1)
tloss = data11['tloss'][:,-3:]
tloss_auc = np.sum(tloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
#msloss = data41['msloss'][:,0,:]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_tloss = np.argsort(tloss_auc)
ranked_by_tloss = ixs2ranks(sorted_by_tloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
#sorted_by_msloss = (np.argsort(msloss[:,0]))
#ranked_by_msloss = ixs2ranks(sorted_by_msloss)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
#six.print_('msloss shape {}'.format(msloss.shape))
six.print_(scores)
num_models = scores.shape[0]
# 40/50 are good models + 1 almost good
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.002)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.95,1.0)
ylim(-0.1,1.1)
#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)
#graph_msloss_score(msloss, scores, postfix=None)
#xlim(0,0.00002)
#ylim(-0.1,1.1)
#figure()
#plot(ranked_by_tloss,ranked_by_msloss,'.')
Out[4]:
In [6]:
for i in six.moves.range(num_models):
ix = sorted_by_qval[i]
#six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | vloss auc {:.5f} {:2d}th | msloss {:.8f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
# ix,scores[ix], ranked_by_score[ix], tloss_auc[ix], ranked_by_tloss[ix], vloss_auc[ix], ranked_by_vloss[ix], msloss[ix,0], ranked_by_msloss[ix], selfqvals[ix], ranked_by_qval[ix]))
six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
ix,scores[ix], ranked_by_score[ix], tloss_auc[ix], ranked_by_tloss[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))
In [8]:
'''
Analyzing results of student2 with 5 skills, with training length 6 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr001A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr001A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr001A.npz')
#data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/msloss-runlr01A.npz')
vloss = data11['vloss'][:,-3:]
vloss_auc = np.sum(vloss,axis=1)
tloss = data11['tloss'][:,-3:]
tloss_auc = np.sum(tloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
#msloss = data41['msloss'][:,0,:]
# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
sorted_by_tloss = np.argsort(tloss_auc)
ranked_by_tloss = ixs2ranks(sorted_by_tloss)
sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)
#sorted_by_msloss = (np.argsort(msloss[:,0]))
#ranked_by_msloss = ixs2ranks(sorted_by_msloss)
six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
#six.print_('msloss shape {}'.format(msloss.shape))
six.print_(scores)
num_models = scores.shape[0]
# 2/50 are good
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.002)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.95,1.0)
ylim(-0.1,1.1)
#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)
#graph_msloss_score(msloss, scores, postfix=None)
#xlim(0,0.00002)
#ylim(-0.1,1.1)
#figure()
#plot(ranked_by_tloss,ranked_by_msloss,'.')
Out[8]:
In [ ]: