In [3]:
# August 8 2017
# This file is for experiments with student2 5 skills and training trajectory length 6,7,8
# to check different architectures and whether they can learn good models
# This uses student2a where transitions happen after observations

%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np
import scipy as sp
import six
from matplotlib.pyplot import *

def ixs2ranks(ixs):
    ranks = np.zeros(ixs.shape,dtype=np.int)
    for r,i in enumerate(ixs):
        ranks[i] = r
    return ranks

def graph_trainauc_score(vloss, scores, postfix=None):
    figure()
    if postfix is not None:
        xs = np.sum(vloss[:,-postfix:],axis=1)
    else:
        xs = np.sum(vloss,axis=1)
    ys = scores
    title('Training Loss AUC versus Actual Performance')
    xlabel('Training Loss AUC')
    ylabel('Posttest Score')
    plot(xs,ys,'.')
    ylim(0,1)

def graph_trainauc_qval(vloss, qval):
    figure()
    xs = np.sum(vloss,axis=1)
    ys = qval
    title('Training Loss AUC versus Own Qvalue')
    xlabel('Training Loss AUC')
    ylabel('Qvalue')
    plot(xs,ys,'.')

def graph_qval_score(qval, scores):
    figure()
    xs = qval
    ys = scores
    title('Own Qval versus Actual Performance')
    xlabel('Qvalue')
    ylabel('Posttest Score')
    plot(xs,ys,'.')

def graph_msloss_score(msloss, scores):
    figure()
    xs = msloss
    ys = scores
    title('Multistep Loss versus Actual Performance')
    xlabel('Multistep Loss')
    ylabel('Posttest Score')
    plot(xs,ys,'.')
    ylim(0,1)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Training Trajectory 7 No Noise Learning Rate 0.0005 Stability


In [5]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005A.npz')

tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]

vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]

figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like 30-40 is enough, so settle on 40


Out[5]:
<matplotlib.legend.Legend at 0x7fe2ef7f2eb8>

Training Trajectory 6 No Noise Learning Rate 0.0005 Stability


In [6]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005A.npz')

tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]

vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]

figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like 50 is enough


Out[6]:
<matplotlib.legend.Legend at 0x7fe2ef72c3c8>

Training Trajectory 6 LR 0.0005 No Noise


In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 6 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')

vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)

tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)

scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)

num_models = scores.shape[0]

graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)
ylim(-0.1,1.1)
graph_trainauc_qval(vloss,selfqvals)
ylim(-0.1,1.1)


vloss shape (50, 51)
scores shape (50,)
[ 0.     0.     0.     0.     0.     0.     0.     0.     1.     0.     0.
  0.     0.     0.     0.     0.     0.     0.     0.     0.     1.     0.25
  1.     0.     0.     0.     0.     0.     0.375  0.     0.     0.125  0.
  0.     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
  0.     0.     0.     0.     0.25   0.   ]
Out[4]:
(-0.1, 1.1)

In [5]:
for i in six.moves.range(num_models):
    ix = sorted_by_qval[i]
    six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
        ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))


31 | score 0.1  6th | vloss auc 0.04219 37th | selfqval 0.8115  0th
40 | score 0.0 36th | vloss auc 0.04506 41th | selfqval 0.7653  1th
29 | score 0.0 46th | vloss auc 0.04027 33th | selfqval 0.6918  2th
42 | score 0.0 34th | vloss auc 0.02753  2th | selfqval 0.5917  3th
41 | score 0.0 35th | vloss auc 0.03562 21th | selfqval 0.5409  4th
21 | score 0.2  5th | vloss auc 0.03818 29th | selfqval 0.4772  5th
10 | score 0.0 16th | vloss auc 0.02863  4th | selfqval 0.4588  6th
37 | score 0.0 38th | vloss auc 0.04433 40th | selfqval 0.4242  7th
19 | score 0.0  8th | vloss auc 0.03841 31th | selfqval 0.3376  8th
35 | score 0.0 41th | vloss auc 0.03066  7th | selfqval 0.2872  9th
33 | score 0.0 43th | vloss auc 0.03515 18th | selfqval 0.2228 10th
 2 | score 0.0 23th | vloss auc 0.02894  5th | selfqval 0.2090 11th
24 | score 0.0 26th | vloss auc 0.04093 34th | selfqval 0.1829 12th
 6 | score 0.0 19th | vloss auc 0.05711 49th | selfqval 0.1193 13th
13 | score 0.0  7th | vloss auc 0.04295 38th | selfqval 0.1084 14th
 0 | score 0.0 49th | vloss auc 0.03352 15th | selfqval 0.0413 15th
 9 | score 0.0 17th | vloss auc 0.03654 24th | selfqval 0.0392 16th
 8 | score 1.0  1th | vloss auc 0.04790 45th | selfqval 0.0390 17th
26 | score 0.0 48th | vloss auc 0.03319 13th | selfqval 0.0386 18th
22 | score 1.0  2th | vloss auc 0.02739  1th | selfqval 0.0289 19th
 1 | score 0.0 24th | vloss auc 0.03408 16th | selfqval 0.0270 20th
14 | score 0.0 13th | vloss auc 0.03594 22th | selfqval 0.0240 21th
39 | score 0.0 37th | vloss auc 0.04605 43th | selfqval 0.0207 22th
17 | score 0.0 10th | vloss auc 0.04925 47th | selfqval 0.0174 23th
45 | score 0.0 31th | vloss auc 0.03111  8th | selfqval 0.0160 24th
20 | score 1.0  0th | vloss auc 0.04191 36th | selfqval 0.0069 25th
47 | score 0.0 29th | vloss auc 0.03417 17th | selfqval 0.0066 26th
27 | score 0.0 47th | vloss auc 0.03685 25th | selfqval 0.0056 27th
43 | score 0.0 33th | vloss auc 0.03631 23th | selfqval 0.0052 28th
11 | score 0.0 15th | vloss auc 0.03939 32th | selfqval 0.0037 29th
28 | score 0.4  3th | vloss auc 0.03151  9th | selfqval 0.0024 30th
 3 | score 0.0 22th | vloss auc 0.03834 30th | selfqval 0.0023 31th
12 | score 0.0 25th | vloss auc 0.04155 35th | selfqval 0.0018 32th
 5 | score 0.0 20th | vloss auc 0.03797 28th | selfqval 0.0018 33th
30 | score 0.0 45th | vloss auc 0.03701 26th | selfqval 0.0007 34th
 4 | score 0.0 21th | vloss auc 0.04750 44th | selfqval 0.0004 35th
46 | score 0.0 30th | vloss auc 0.02618  0th | selfqval 0.0004 36th
25 | score 0.0 39th | vloss auc 0.03312 12th | selfqval 0.0003 37th
48 | score 0.2  4th | vloss auc 0.03738 27th | selfqval 0.0003 38th
16 | score 0.0 11th | vloss auc 0.02999  6th | selfqval 0.0003 39th
32 | score 0.0 44th | vloss auc 0.04814 46th | selfqval 0.0003 40th
 7 | score 0.0 18th | vloss auc 0.02795  3th | selfqval 0.0002 41th
49 | score 0.0 14th | vloss auc 0.05158 48th | selfqval 0.0001 42th
34 | score 0.0 42th | vloss auc 0.03522 19th | selfqval 0.0001 43th
36 | score 0.0 40th | vloss auc 0.03207 11th | selfqval 0.0001 44th
15 | score 0.0 12th | vloss auc 0.03527 20th | selfqval 0.0000 45th
38 | score 0.0 28th | vloss auc 0.03330 14th | selfqval 0.0000 46th
44 | score 0.0 32th | vloss auc 0.04351 39th | selfqval 0.0000 47th
23 | score 0.0 27th | vloss auc 0.03198 10th | selfqval 0.0000 48th
18 | score 0.0  9th | vloss auc 0.04584 42th | selfqval 0.0000 49th

Training Trajectories Length 7 LR 0.0005 No Noise


In [8]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')

vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)

tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)

scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)

num_models = scores.shape[0]

graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)
ylim(-0.1,1.1)
#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)


vloss shape (50, 41)
scores shape (50,)
[ 1.     1.     1.     1.     0.     1.     1.     1.     1.     1.     0.
  1.     1.     1.     1.     0.     1.     1.     1.     1.     1.     1.
  1.     1.     1.     1.     1.     0.     1.     1.     1.     1.     1.
  1.     1.     1.     1.     1.     1.     1.     1.     1.     0.     1.
  0.     0.     1.     0.     1.     0.375]
Out[8]:
(-0.1, 1.1)

In [7]:
for i in six.moves.range(num_models):
    ix = sorted_by_qval[i]
    six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
        ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))


30 | score 1.0 38th | vloss auc 0.03541  2th | selfqval 0.9449  0th
24 | score 1.0  0th | vloss auc 0.03748  6th | selfqval 0.9341  1th
12 | score 1.0 11th | vloss auc 0.04685 32th | selfqval 0.8833  2th
34 | score 1.0 34th | vloss auc 0.04576 29th | selfqval 0.8697  3th
41 | score 1.0 28th | vloss auc 0.06233 47th | selfqval 0.8412  4th
13 | score 1.0  1th | vloss auc 0.05403 41th | selfqval 0.8375  5th
 2 | score 1.0 20th | vloss auc 0.04464 24th | selfqval 0.8244  6th
23 | score 1.0  2th | vloss auc 0.05828 45th | selfqval 0.8155  7th
 5 | score 1.0 18th | vloss auc 0.04571 27th | selfqval 0.8082  8th
38 | score 1.0 33th | vloss auc 0.05252 39th | selfqval 0.8032  9th
 6 | score 1.0 17th | vloss auc 0.04337 20th | selfqval 0.8023 10th
25 | score 1.0 12th | vloss auc 0.03592  3th | selfqval 0.7943 11th
17 | score 1.0  8th | vloss auc 0.05410 42th | selfqval 0.7923 12th
 8 | score 1.0 15th | vloss auc 0.04534 26th | selfqval 0.7755 13th
15 | score 0.0 42th | vloss auc 0.06502 48th | selfqval 0.7751 14th
 7 | score 1.0 16th | vloss auc 0.04691 33th | selfqval 0.7705 15th
16 | score 1.0  9th | vloss auc 0.03847  8th | selfqval 0.7592 16th
14 | score 1.0 10th | vloss auc 0.04061 11th | selfqval 0.7488 17th
 9 | score 1.0 14th | vloss auc 0.04792 34th | selfqval 0.7298 18th
31 | score 1.0 37th | vloss auc 0.04573 28th | selfqval 0.7006 19th
35 | score 1.0 25th | vloss auc 0.04224 18th | selfqval 0.6919 20th
 0 | score 1.0 23th | vloss auc 0.04302 19th | selfqval 0.6915 21th
22 | score 1.0  3th | vloss auc 0.04086 12th | selfqval 0.6872 22th
19 | score 1.0  6th | vloss auc 0.04180 15th | selfqval 0.6543 23th
 3 | score 1.0 19th | vloss auc 0.03837  7th | selfqval 0.6529 24th
39 | score 1.0 30th | vloss auc 0.05764 44th | selfqval 0.6502 25th
42 | score 0.0 45th | vloss auc 0.04662 31th | selfqval 0.6444 26th
10 | score 0.0 44th | vloss auc 0.03721  4th | selfqval 0.6420 27th
29 | score 1.0 39th | vloss auc 0.05093 38th | selfqval 0.5927 28th
28 | score 1.0 40th | vloss auc 0.04182 16th | selfqval 0.5693 29th
26 | score 1.0 24th | vloss auc 0.03470  1th | selfqval 0.5379 30th
36 | score 1.0 32th | vloss auc 0.05645 43th | selfqval 0.5307 31th
43 | score 1.0 27th | vloss auc 0.03146  0th | selfqval 0.5304 32th
47 | score 0.0 49th | vloss auc 0.04406 23th | selfqval 0.4941 33th
32 | score 1.0 36th | vloss auc 0.04588 30th | selfqval 0.4812 34th
20 | score 1.0  5th | vloss auc 0.04341 21th | selfqval 0.4779 35th
21 | score 1.0  4th | vloss auc 0.03942  9th | selfqval 0.4742 36th
40 | score 1.0 29th | vloss auc 0.04024 10th | selfqval 0.4567 37th
11 | score 1.0 13th | vloss auc 0.04190 17th | selfqval 0.4135 38th
18 | score 1.0  7th | vloss auc 0.04370 22th | selfqval 0.3380 39th
27 | score 0.0 43th | vloss auc 0.05047 36th | selfqval 0.3278 40th
33 | score 1.0 35th | vloss auc 0.04500 25th | selfqval 0.3228 41th
48 | score 1.0 22th | vloss auc 0.04869 35th | selfqval 0.2936 42th
44 | score 0.0 46th | vloss auc 0.04173 14th | selfqval 0.2379 43th
37 | score 1.0 31th | vloss auc 0.04093 13th | selfqval 0.1779 44th
49 | score 0.4 41th | vloss auc 0.05078 37th | selfqval 0.1434 45th
 1 | score 1.0 21th | vloss auc 0.06050 46th | selfqval 0.1262 46th
46 | score 1.0 26th | vloss auc 0.03738  5th | selfqval 0.1012 47th
45 | score 0.0 48th | vloss auc 0.08368 49th | selfqval 0.0043 48th
 4 | score 0.0 47th | vloss auc 0.05331 40th | selfqval 0.0007 49th

Training Length 6 Noise 0.05 LR 0.0005 Stability


In [2]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005A.npz')

tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]

vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]

figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like wants 100 is enough


Out[2]:
<matplotlib.legend.Legend at 0x7f7572647da0>

Training Length 7 Noise 0.05 LR 0.0005 Stability


In [3]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005A.npz')

tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]

vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]

figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0001)
legend()
# looks like 90 is enough


Out[3]:
<matplotlib.legend.Legend at 0x7f7572533ef0>

Length 7 Noise 0.05


In [3]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')
data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/msloss-runlr0005B.npz')

vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)

tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)

msloss = data41['msloss'][:,0,:]

scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_msloss = np.argsort(msloss)
ranked_by_msloss = ixs2ranks(sorted_by_msloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)

num_models = scores.shape[0]

graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
# 48/50 good

graph_qval_score(selfqvals, scores)
ylim(-0.1,1.1)

graph_msloss_score(msloss, scores)
ylim(-0.1,1.1)

#graph_trainauc_(vloss,selfqvals)
#ylim(-0.1,1.1)


vloss shape (50, 91)
scores shape (50,)
[ 1.     1.     1.     1.     1.     1.     1.     1.     1.     1.     1.
  1.     1.     1.     1.     1.     1.     1.     1.     0.     1.     1.
  0.375  1.     1.     1.     1.     1.     1.     1.     1.     1.     1.
  1.     1.     1.     1.     1.     1.     1.     1.     1.     1.     1.
  1.     1.     1.     1.     1.     1.   ]
Out[3]:
(-0.1, 1.1)

Length 6 Noise 0.05


In [5]:
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr0005B.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr0005B.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr0005B.npz')
data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/msloss-runlr0005B.npz')

vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)

tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)

msloss = data41['msloss'][:,0,0]

scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_msloss = np.argsort(msloss)
ranked_by_msloss = ixs2ranks(sorted_by_msloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_('msloss shape {}'.format(msloss.shape))
six.print_(scores)

num_models = scores.shape[0]

graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
# 6/50 good

graph_msloss_score(msloss, scores)
ylim(-0.1,1.1)

graph_qval_score(selfqvals, scores)
ylim(-0.1,1.1)

#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)


vloss shape (50, 101)
scores shape (50,)
msloss shape (50,)
[ 0.25   0.25   0.75   0.     0.     1.     0.     0.     0.875  0.     0.
  0.375  1.     0.25   0.     0.     1.     0.     0.     0.25   1.     0.625
  0.     0.     0.     0.     0.     0.     0.125  0.25   0.     0.     0.
  0.625  0.     0.     0.     0.     0.     0.625  0.     0.     1.     1.
  0.     0.     0.     0.125  0.     0.   ]
Out[5]:
(-0.1, 1.1)

In [6]:
for i in six.moves.range(num_models):
    ix = sorted_by_msloss[i]
    six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
        ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))


42 | score 1.0  2th | vloss auc 0.02881  2th | selfqval 0.8591  5th
30 | score 0.0 44th | vloss auc 0.03728 19th | selfqval 0.8093  8th
20 | score 1.0  5th | vloss auc 0.03258  8th | selfqval 0.6495 14th
18 | score 0.0 27th | vloss auc 0.03974 23th | selfqval 0.4393 20th
15 | score 0.0 25th | vloss auc 0.03084  3th | selfqval 0.9396  0th
 5 | score 1.0  3th | vloss auc 0.03678 16th | selfqval 0.9255  1th
41 | score 0.0 35th | vloss auc 0.04051 29th | selfqval 0.0332 33th
49 | score 0.0 29th | vloss auc 0.04256 34th | selfqval 0.7553 10th
 4 | score 0.0 22th | vloss auc 0.04532 39th | selfqval 0.0229 34th
35 | score 0.0 40th | vloss auc 0.03990 24th | selfqval 0.0399 30th
34 | score 0.0 41th | vloss auc 0.06308 49th | selfqval 0.3091 24th
21 | score 0.6  8th | vloss auc 0.05584 45th | selfqval 0.2380 27th
14 | score 0.0 24th | vloss auc 0.04326 35th | selfqval 0.9037  2th
 8 | score 0.9  6th | vloss auc 0.04041 28th | selfqval 0.3502 23th
39 | score 0.6 10th | vloss auc 0.03573 13th | selfqval 0.0036 38th
40 | score 0.0 36th | vloss auc 0.04658 40th | selfqval 0.6427 16th
32 | score 0.0 42th | vloss auc 0.05452 44th | selfqval 0.6810 13th
37 | score 0.0 38th | vloss auc 0.05025 43th | selfqval 0.7771  9th
24 | score 0.0 49th | vloss auc 0.03483 11th | selfqval 0.8118  7th
12 | score 1.0  4th | vloss auc 0.03131  4th | selfqval 0.4624 19th
23 | score 0.0 31th | vloss auc 0.04008 25th | selfqval 0.0205 35th
44 | score 0.0 34th | vloss auc 0.03272  9th | selfqval 0.1683 28th
48 | score 0.0 48th | vloss auc 0.04010 26th | selfqval 0.7205 11th
22 | score 0.0 30th | vloss auc 0.04147 32th | selfqval 0.0021 40th
38 | score 0.0 37th | vloss auc 0.03170  6th | selfqval 0.8761  3th
47 | score 0.1 18th | vloss auc 0.02805  0th | selfqval 0.0746 29th
25 | score 0.0 47th | vloss auc 0.04990 42th | selfqval 0.8442  6th
27 | score 0.0 45th | vloss auc 0.03528 12th | selfqval 0.3631 22th
 7 | score 0.0 20th | vloss auc 0.03914 21th | selfqval 0.8658  4th
33 | score 0.6  9th | vloss auc 0.04053 30th | selfqval 0.3842 21th
17 | score 0.0 26th | vloss auc 0.03139  5th | selfqval 0.0000 48th
45 | score 0.0 33th | vloss auc 0.05663 46th | selfqval 0.2727 25th
43 | score 1.0  1th | vloss auc 0.04454 38th | selfqval 0.0014 41th
28 | score 0.1 17th | vloss auc 0.04017 27th | selfqval 0.0042 37th
13 | score 0.2 14th | vloss auc 0.04408 36th | selfqval 0.0384 31th
16 | score 1.0  0th | vloss auc 0.03222  7th | selfqval 0.0000 49th
 6 | score 0.0 21th | vloss auc 0.03342 10th | selfqval 0.0032 39th
 3 | score 0.0 23th | vloss auc 0.03621 14th | selfqval 0.0012 42th
 2 | score 0.8  7th | vloss auc 0.02837  1th | selfqval 0.0005 43th
46 | score 0.0 32th | vloss auc 0.04429 37th | selfqval 0.0003 44th
29 | score 0.2 13th | vloss auc 0.03634 15th | selfqval 0.0375 32th
 1 | score 0.2 16th | vloss auc 0.03879 20th | selfqval 0.2719 26th
19 | score 0.2 12th | vloss auc 0.03706 18th | selfqval 0.0187 36th
 0 | score 0.2 15th | vloss auc 0.06153 47th | selfqval 0.5803 18th
26 | score 0.0 46th | vloss auc 0.04251 33th | selfqval 0.6956 12th
11 | score 0.4 11th | vloss auc 0.06264 48th | selfqval 0.6489 15th
10 | score 0.0 19th | vloss auc 0.04108 31th | selfqval 0.5973 17th
31 | score 0.0 43th | vloss auc 0.03972 22th | selfqval 0.0001 47th
 9 | score 0.0 28th | vloss auc 0.04715 41th | selfqval 0.0001 46th
36 | score 0.0 39th | vloss auc 0.03688 17th | selfqval 0.0001 45th

Old dropout result


In [ ]:


In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Dropout 0.8
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-dropout8-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr01A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-dropout8-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr01A.npz')

vloss = data11['vloss']
scores = data21['scores'][:,0]
qvals = data21['qvals'][:,0]

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)

graph_trainauc_score(vloss, scores)
graph_qval_score(qvals, scores)


vloss shape (20, 41)
scores shape (20,)
[ 0.     1.     0.     0.     0.     0.     0.     0.     0.     0.     0.375
  0.     1.     0.     0.     0.     0.     1.     0.     0.   ]

Small Size Training Length 6


In [8]:
'''
Analyzing results of student2 with 5 skills, with training length 6 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr01A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr01A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr01A.npz')


vloss = data11['vloss']
vloss_auc = np.sum(vloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)

num_models = scores.shape[0]

# looks like 5/50 are good
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)for i in six.moves.range(num_models):
ylim(-0.1,1.1)
graph_trainauc_qval(vloss,selfqvals)
ylim(-0.1,1.1)


vloss shape (50, 51)
scores shape (50,)
[ 0.625  1.     0.     0.     0.     0.     0.125  0.     1.     0.     1.
  0.     0.     0.     0.     0.     0.     0.     0.     1.     0.5    0.
  0.     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
  0.     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
  0.     1.     0.     0.     0.625  0.   ]
Out[8]:
(-0.1, 1.1)

In [10]:
for i in six.moves.range(num_models):
    ix = sorted_by_qval[i]
    six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
        ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))
# sorting by self-reported qval seems like it may work


 1 | score 1.0  1th | vloss auc 0.01306 49th | selfqval 0.9485  0th
 8 | score 1.0  0th | vloss auc 0.00226 21th | selfqval 0.9433  1th
18 | score 0.0 14th | vloss auc 0.00286 34th | selfqval 0.9342  2th
39 | score 0.0 36th | vloss auc 0.00234 24th | selfqval 0.8614  3th
19 | score 1.0  2th | vloss auc 0.00167  8th | selfqval 0.7877  4th
15 | score 0.0 10th | vloss auc 0.00188 11th | selfqval 0.4969  5th
44 | score 0.0 31th | vloss auc 0.01068 48th | selfqval 0.4100  6th
45 | score 1.0  3th | vloss auc 0.00149  6th | selfqval 0.3622  7th
46 | score 0.0 30th | vloss auc 0.00267 30th | selfqval 0.3561  8th
 6 | score 0.1  8th | vloss auc 0.00233 22th | selfqval 0.2906  9th
48 | score 0.6  6th | vloss auc 0.00138  4th | selfqval 0.2349 10th
22 | score 0.0 11th | vloss auc 0.00253 29th | selfqval 0.1519 11th
35 | score 0.0 40th | vloss auc 0.00176  9th | selfqval 0.1212 12th
 4 | score 0.0 23th | vloss auc 0.00147  5th | selfqval 0.0594 13th
32 | score 0.0 43th | vloss auc 0.00179 10th | selfqval 0.0233 14th
26 | score 0.0 39th | vloss auc 0.00331 41th | selfqval 0.0228 15th
34 | score 0.0 41th | vloss auc 0.00120  1th | selfqval 0.0226 16th
36 | score 0.0 38th | vloss auc 0.00553 46th | selfqval 0.0210 17th
31 | score 0.0 44th | vloss auc 0.00194 13th | selfqval 0.0138 18th
14 | score 0.0 17th | vloss auc 0.00194 14th | selfqval 0.0107 19th
29 | score 0.0 46th | vloss auc 0.00239 26th | selfqval 0.0063 20th
38 | score 0.0 37th | vloss auc 0.00280 33th | selfqval 0.0060 21th
11 | score 0.0 25th | vloss auc 0.00138  3th | selfqval 0.0023 22th
27 | score 0.0 48th | vloss auc 0.00197 16th | selfqval 0.0021 23th
30 | score 0.0 45th | vloss auc 0.00190 12th | selfqval 0.0014 24th
47 | score 0.0 29th | vloss auc 0.00239 27th | selfqval 0.0010 25th
41 | score 0.0 34th | vloss auc 0.00194 15th | selfqval 0.0007 26th
13 | score 0.0 18th | vloss auc 0.00223 20th | selfqval 0.0006 27th
 3 | score 0.0 21th | vloss auc 0.00163  7th | selfqval 0.0005 28th
20 | score 0.5  7th | vloss auc 0.00372 44th | selfqval 0.0003 29th
17 | score 0.0 15th | vloss auc 0.00110  0th | selfqval 0.0002 30th
42 | score 0.0 33th | vloss auc 0.00235 25th | selfqval 0.0002 31th
21 | score 0.0 12th | vloss auc 0.00274 31th | selfqval 0.0001 32th
33 | score 0.0 42th | vloss auc 0.00277 32th | selfqval 0.0000 33th
24 | score 0.0 49th | vloss auc 0.00320 39th | selfqval 0.0000 34th
12 | score 0.0 19th | vloss auc 0.00223 19th | selfqval 0.0000 35th
10 | score 1.0  4th | vloss auc 0.00308 38th | selfqval 0.0000 36th
 0 | score 0.6  5th | vloss auc 0.00297 37th | selfqval 0.0000 37th
 7 | score 0.0 24th | vloss auc 0.00234 23th | selfqval 0.0000 38th
16 | score 0.0 16th | vloss auc 0.00363 43th | selfqval 0.0000 39th
40 | score 0.0 35th | vloss auc 0.00329 40th | selfqval 0.0000 40th
49 | score 0.0 26th | vloss auc 0.00296 35th | selfqval 0.0000 41th
 2 | score 0.0 13th | vloss auc 0.00253 28th | selfqval 0.0000 42th
43 | score 0.0 32th | vloss auc 0.00136  2th | selfqval 0.0000 43th
 9 | score 0.0 22th | vloss auc 0.00461 45th | selfqval 0.0000 44th
37 | score 0.0 28th | vloss auc 0.00296 36th | selfqval 0.0000 45th
25 | score 0.0 27th | vloss auc 0.00211 18th | selfqval 0.0000 46th
 5 | score 0.0  9th | vloss auc 0.00739 47th | selfqval 0.0000 47th
28 | score 0.0 47th | vloss auc 0.00206 17th | selfqval 0.0000 48th
23 | score 0.0 20th | vloss auc 0.00340 42th | selfqval 0.0000 49th

Small size Training Length 7


In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr01A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr01A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_small-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr01A.npz')


vloss = data11['vloss']
vloss_auc = np.sum(vloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)

num_models = scores.shape[0]

# looks like 30/50 are good
# 0-11 top vloss look good, except ix2 is slightly stochastic
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.001)
ylim(-0.1,1.1)
# 0-17 top qvals are good
graph_qval_score(selfqvals, scores)
#xlim(0.9,1.0)
ylim(-0.1,1.1)
graph_trainauc_qval(vloss,selfqvals)
ylim(-0.1,1.1)


vloss shape (50, 41)
scores shape (50,)
[ 0.    1.    1.    1.    1.    0.    1.    1.    1.    1.    0.75  0.    1.
  1.    1.    0.    1.    1.    0.    0.    1.    0.    0.    1.    0.    0.
  0.    1.    0.    1.    0.    0.    1.    1.    0.5   1.    1.    0.    1.
  1.    0.    0.    1.    1.    1.    1.    1.    1.    0.    1.  ]
Out[4]:
(-0.1, 1.1)

In [7]:
for i in six.moves.range(num_models):
    ix = sorted_by_vloss[i]
    six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
        ix,scores[ix], ranked_by_score[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))


 6 | score 1.0 11th | vloss auc 0.00086  0th | selfqval 0.8513 14th
13 | score 1.0  3th | vloss auc 0.00088  1th | selfqval 0.9835  6th
10 | score 0.8 30th | vloss auc 0.00097  2th | selfqval 0.2057 26th
27 | score 1.0 10th | vloss auc 0.00101  3th | selfqval 0.3891 20th
35 | score 1.0 23th | vloss auc 0.00104  4th | selfqval 0.2640 24th
36 | score 1.0 29th | vloss auc 0.00104  5th | selfqval 0.0547 29th
 9 | score 1.0  7th | vloss auc 0.00105  6th | selfqval 0.1403 27th
 2 | score 1.0 25th | vloss auc 0.00108  7th | selfqval 0.9854  3th
16 | score 1.0 24th | vloss auc 0.00109  8th | selfqval 0.9775  8th
43 | score 1.0 22th | vloss auc 0.00110  9th | selfqval 0.9940  0th
17 | score 1.0  2th | vloss auc 0.00124 10th | selfqval 0.6717 16th
44 | score 1.0 21th | vloss auc 0.00125 11th | selfqval 0.9665 10th
15 | score 0.0 35th | vloss auc 0.00127 12th | selfqval 0.0004 38th
40 | score 0.0 47th | vloss auc 0.00128 13th | selfqval 0.0039 33th
26 | score 0.0 42th | vloss auc 0.00133 14th | selfqval 0.0005 37th
 0 | score 0.0 49th | vloss auc 0.00133 15th | selfqval 0.0081 31th
 8 | score 1.0  8th | vloss auc 0.00138 16th | selfqval 0.9864  1th
41 | score 0.0 48th | vloss auc 0.00142 17th | selfqval 0.0420 30th
11 | score 0.0 32th | vloss auc 0.00142 18th | selfqval 0.0062 32th
 7 | score 1.0  9th | vloss auc 0.00147 19th | selfqval 0.6576 17th
42 | score 1.0 16th | vloss auc 0.00150 20th | selfqval 0.9819  7th
22 | score 0.0 39th | vloss auc 0.00153 21th | selfqval 0.6226 18th
14 | score 1.0 14th | vloss auc 0.00154 22th | selfqval 0.9699  9th
39 | score 1.0 26th | vloss auc 0.00172 23th | selfqval 0.0000 40th
31 | score 0.0 45th | vloss auc 0.00179 24th | selfqval 0.0000 47th
18 | score 0.0 36th | vloss auc 0.00187 25th | selfqval 0.0039 34th
46 | score 1.0 19th | vloss auc 0.00189 26th | selfqval 0.7611 15th
32 | score 1.0 15th | vloss auc 0.00191 27th | selfqval 0.9860  2th
29 | score 1.0 12th | vloss auc 0.00193 28th | selfqval 0.3745 21th
 5 | score 0.0 33th | vloss auc 0.00201 29th | selfqval 0.0025 35th
34 | score 0.5 31th | vloss auc 0.00204 30th | selfqval 0.0000 42th
45 | score 1.0 20th | vloss auc 0.00208 31th | selfqval 0.8726 12th
49 | score 1.0  0th | vloss auc 0.00246 32th | selfqval 0.1345 28th
 4 | score 1.0 13th | vloss auc 0.00251 33th | selfqval 0.2419 25th
 3 | score 1.0 28th | vloss auc 0.00271 34th | selfqval 0.9850  4th
47 | score 1.0 18th | vloss auc 0.00275 35th | selfqval 0.3214 23th
38 | score 1.0 27th | vloss auc 0.00296 36th | selfqval 0.9841  5th
25 | score 0.0 41th | vloss auc 0.00303 37th | selfqval 0.0000 49th
37 | score 0.0 46th | vloss auc 0.00308 38th | selfqval 0.0000 41th
 1 | score 1.0 17th | vloss auc 0.00315 39th | selfqval 0.0003 39th
33 | score 1.0  1th | vloss auc 0.00331 40th | selfqval 0.8604 13th
24 | score 0.0 34th | vloss auc 0.00332 41th | selfqval 0.0000 44th
20 | score 1.0  5th | vloss auc 0.00341 42th | selfqval 0.6057 19th
48 | score 0.0 40th | vloss auc 0.00345 43th | selfqval 0.0007 36th
28 | score 0.0 43th | vloss auc 0.00379 44th | selfqval 0.0000 46th
30 | score 0.0 44th | vloss auc 0.00386 45th | selfqval 0.0000 43th
21 | score 0.0 38th | vloss auc 0.00391 46th | selfqval 0.0000 45th
19 | score 0.0 37th | vloss auc 0.00498 47th | selfqval 0.0000 48th
12 | score 1.0  4th | vloss auc 0.00615 48th | selfqval 0.3551 22th
23 | score 1.0  6th | vloss auc 0.02008 49th | selfqval 0.9105 11th

Training Length 7 Noise 0.01 Learning Rate 0.001


In [4]:
'''
Analyzing results of student2 with 5 skills, with training length 7 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/stats-runlr001A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr001A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr001A.npz')
#data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l7-random.pickle/msloss-runlr01A.npz')

vloss = data11['vloss'][:,-3:]
vloss_auc = np.sum(vloss,axis=1)
tloss = data11['tloss'][:,-3:]
tloss_auc = np.sum(tloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
#msloss = data41['msloss'][:,0,:]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_tloss = np.argsort(tloss_auc)
ranked_by_tloss = ixs2ranks(sorted_by_tloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

#sorted_by_msloss = (np.argsort(msloss[:,0]))
#ranked_by_msloss = ixs2ranks(sorted_by_msloss)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
#six.print_('msloss shape {}'.format(msloss.shape))
six.print_(scores)

num_models = scores.shape[0]

# 40/50 are good models + 1 almost good
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.002)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.95,1.0)
ylim(-0.1,1.1)
#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)
#graph_msloss_score(msloss, scores, postfix=None)
#xlim(0,0.00002)
#ylim(-0.1,1.1)
#figure()
#plot(ranked_by_tloss,ranked_by_msloss,'.')


vloss shape (50, 3)
scores shape (50,)
[ 1.     0.125  1.     1.     1.     1.     0.     1.     1.     1.     1.
  0.     0.375  1.     1.     0.     1.     1.     1.     1.     1.     1.
  1.     1.     0.     1.     1.     1.     1.     1.     1.     1.     1.
  1.     1.     0.625  1.     1.     0.375  1.     1.     0.     0.875  1.
  1.     1.     1.     1.     1.     1.   ]
Out[4]:
(-0.1, 1.1)

In [6]:
for i in six.moves.range(num_models):
    ix = sorted_by_qval[i]
    #six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | vloss auc {:.5f} {:2d}th | msloss {:.8f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
    #    ix,scores[ix], ranked_by_score[ix], tloss_auc[ix], ranked_by_tloss[ix], vloss_auc[ix], ranked_by_vloss[ix], msloss[ix,0], ranked_by_msloss[ix], selfqvals[ix], ranked_by_qval[ix]))
    six.print_('{:2d} | score {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | vloss auc {:.5f} {:2d}th | selfqval {:.4f} {:2d}th'.format(
        ix,scores[ix], ranked_by_score[ix], tloss_auc[ix], ranked_by_tloss[ix], vloss_auc[ix], ranked_by_vloss[ix], selfqvals[ix], ranked_by_qval[ix]))


46 | score 1.0 26th | vloss auc 0.00004 30th | vloss auc 0.00000 39th | selfqval 0.9603  0th
36 | score 1.0 33th | vloss auc 0.00007 36th | vloss auc 0.00000 46th | selfqval 0.9594  1th
22 | score 1.0  4th | vloss auc 0.00003 21th | vloss auc 0.00000 45th | selfqval 0.9573  2th
32 | score 1.0 38th | vloss auc 0.00007 37th | vloss auc 0.00000  1th | selfqval 0.9558  3th
48 | score 1.0  2th | vloss auc 0.00009 43th | vloss auc 0.00000 17th | selfqval 0.9514  4th
33 | score 1.0 34th | vloss auc 0.00000  0th | vloss auc 0.00000 18th | selfqval 0.9373  5th
 9 | score 1.0 14th | vloss auc 0.00001  9th | vloss auc 0.00000 34th | selfqval 0.9293  6th
16 | score 1.0 10th | vloss auc 0.00001  8th | vloss auc 0.00000 11th | selfqval 0.9258  7th
28 | score 1.0 31th | vloss auc 0.00009 42th | vloss auc 0.00000 42th | selfqval 0.9229  8th
43 | score 1.0 29th | vloss auc 0.00002 11th | vloss auc 0.00000 27th | selfqval 0.9218  9th
23 | score 1.0  3th | vloss auc 0.00013 48th | vloss auc 0.00000 13th | selfqval 0.9106 10th
30 | score 1.0 36th | vloss auc 0.00006 34th | vloss auc 0.00000  5th | selfqval 0.9096 11th
24 | score 0.0 49th | vloss auc 0.00001  4th | vloss auc 0.00000 14th | selfqval 0.9081 12th
 8 | score 1.0 15th | vloss auc 0.00001  6th | vloss auc 0.00000 40th | selfqval 0.9019 13th
19 | score 1.0  7th | vloss auc 0.00003 18th | vloss auc 0.00000  7th | selfqval 0.8882 14th
31 | score 1.0 37th | vloss auc 0.00000  2th | vloss auc 0.00000  9th | selfqval 0.8831 15th
 4 | score 1.0 18th | vloss auc 0.00004 31th | vloss auc 0.00000 10th | selfqval 0.8814 16th
18 | score 1.0  8th | vloss auc 0.00003 17th | vloss auc 0.00000 22th | selfqval 0.8790 17th
37 | score 1.0 24th | vloss auc 0.00002 14th | vloss auc 0.00000 41th | selfqval 0.8753 18th
 3 | score 1.0 19th | vloss auc 0.00003 27th | vloss auc 0.00000 25th | selfqval 0.8733 19th
13 | score 1.0 12th | vloss auc 0.00001  3th | vloss auc 0.00000  6th | selfqval 0.8674 20th
27 | score 1.0 23th | vloss auc 0.00001  5th | vloss auc 0.00000 24th | selfqval 0.8671 21th
17 | score 1.0  9th | vloss auc 0.00010 45th | vloss auc 0.00000 48th | selfqval 0.8656 22th
47 | score 1.0 25th | vloss auc 0.00003 20th | vloss auc 0.00000  3th | selfqval 0.8616 23th
 7 | score 1.0 16th | vloss auc 0.00003 23th | vloss auc 0.00000 28th | selfqval 0.8560 24th
 2 | score 1.0 20th | vloss auc 0.00004 32th | vloss auc 0.00000 37th | selfqval 0.8521 25th
15 | score 0.0 45th | vloss auc 0.00008 40th | vloss auc 0.00000 43th | selfqval 0.8504 26th
49 | score 1.0  0th | vloss auc 0.00009 44th | vloss auc 0.00000 16th | selfqval 0.8469 27th
 0 | score 1.0 22th | vloss auc 0.00007 38th | vloss auc 0.00000  8th | selfqval 0.8161 28th
11 | score 0.0 48th | vloss auc 0.00006 35th | vloss auc 0.00000  2th | selfqval 0.8043 29th
14 | score 1.0 11th | vloss auc 0.00000  1th | vloss auc 0.00000 12th | selfqval 0.7405 30th
40 | score 1.0 30th | vloss auc 0.00003 24th | vloss auc 0.00000 32th | selfqval 0.7240 31th
35 | score 0.6 41th | vloss auc 0.00003 28th | vloss auc 0.00000 21th | selfqval 0.6801 32th
29 | score 1.0 35th | vloss auc 0.00014 49th | vloss auc 0.00000 20th | selfqval 0.5740 33th
45 | score 1.0 27th | vloss auc 0.00002 12th | vloss auc 0.00000 47th | selfqval 0.5513 34th
 5 | score 1.0 17th | vloss auc 0.00003 26th | vloss auc 0.00000 36th | selfqval 0.5263 35th
44 | score 1.0 28th | vloss auc 0.00001  7th | vloss auc 0.00000 30th | selfqval 0.4235 36th
26 | score 1.0  1th | vloss auc 0.00009 41th | vloss auc 0.00000 35th | selfqval 0.2975 37th
10 | score 1.0 13th | vloss auc 0.00010 46th | vloss auc 0.00000 31th | selfqval 0.2951 38th
39 | score 1.0 32th | vloss auc 0.00002 15th | vloss auc 0.00000 15th | selfqval 0.2915 39th
42 | score 0.9 40th | vloss auc 0.00003 25th | vloss auc 0.00000 38th | selfqval 0.2111 40th
25 | score 1.0 21th | vloss auc 0.00006 33th | vloss auc 0.00001 49th | selfqval 0.1801 41th
12 | score 0.4 42th | vloss auc 0.00003 19th | vloss auc 0.00000  4th | selfqval 0.1279 42th
21 | score 1.0  5th | vloss auc 0.00002 13th | vloss auc 0.00000 33th | selfqval 0.1277 43th
41 | score 0.0 46th | vloss auc 0.00002 10th | vloss auc 0.00000 44th | selfqval 0.0662 44th
20 | score 1.0  6th | vloss auc 0.00003 22th | vloss auc 0.00000 29th | selfqval 0.0453 45th
 1 | score 0.1 44th | vloss auc 0.00004 29th | vloss auc 0.00000 19th | selfqval 0.0428 46th
 6 | score 0.0 47th | vloss auc 0.00002 16th | vloss auc 0.00000 23th | selfqval 0.0241 47th
38 | score 0.4 43th | vloss auc 0.00012 47th | vloss auc 0.00000  0th | selfqval 0.0050 48th
34 | score 1.0 39th | vloss auc 0.00008 39th | vloss auc 0.00000 26th | selfqval 0.0028 49th

Training Length 6 Noise 0.01 Learning Rate 0.001


In [8]:
'''
Analyzing results of student2 with 5 skills, with training length 6 and testing length 8.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/stats-runlr001A.npz')
data21 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real1-runlr001A.npz')
data31 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/mcts-rtype2-rollouts20000-trajectories8-real0-runlr001A.npz')
#data41 = np.load('experiments/test2w5_modelgrusimple_mid-noise0.01-dropout10-shuffle1-data-test2a-w5-n100000-l6-random.pickle/msloss-runlr01A.npz')

vloss = data11['vloss'][:,-3:]
vloss_auc = np.sum(vloss,axis=1)
tloss = data11['tloss'][:,-3:]
tloss_auc = np.sum(tloss,axis=1)
scores = np.vstack((data21['scores'],))[:,0]
realqvals = np.vstack((data21['qvals'],))[:,0]
selfqvals = np.vstack((data31['qvals'],))[:,0]
#msloss = data41['msloss'][:,0,:]

# ixs sorted by various things
sorted_by_score = np.flipud(np.argsort(scores))
ranked_by_score = ixs2ranks(sorted_by_score)

sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)

sorted_by_tloss = np.argsort(tloss_auc)
ranked_by_tloss = ixs2ranks(sorted_by_tloss)

sorted_by_qval = np.flipud(np.argsort(selfqvals))
ranked_by_qval = ixs2ranks(sorted_by_qval)

#sorted_by_msloss = (np.argsort(msloss[:,0]))
#ranked_by_msloss = ixs2ranks(sorted_by_msloss)

six.print_('vloss shape {}'.format(vloss.shape))
six.print_('scores shape {}'.format(scores.shape))
#six.print_('msloss shape {}'.format(msloss.shape))
six.print_(scores)

num_models = scores.shape[0]

# 2/50 are good
graph_trainauc_score(vloss, scores, postfix=None)
#xlim(0,0.002)
ylim(-0.1,1.1)
graph_qval_score(selfqvals, scores)
#xlim(0.95,1.0)
ylim(-0.1,1.1)
#graph_trainauc_qval(vloss,selfqvals)
#ylim(-0.1,1.1)
#graph_msloss_score(msloss, scores, postfix=None)
#xlim(0,0.00002)
#ylim(-0.1,1.1)
#figure()
#plot(ranked_by_tloss,ranked_by_msloss,'.')


vloss shape (50, 3)
scores shape (50,)
[ 0.     0.     0.     0.     1.     1.     0.     0.     0.     0.     0.
  0.     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
  0.     0.     0.     0.125  0.     0.     0.     0.     0.     0.     0.
  0.     0.     0.     0.     0.125  0.     0.     0.     0.25   0.     0.
  0.     0.     0.     0.     0.     0.   ]
Out[8]:
(-0.1, 1.1)

In [ ]: