In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import scipy as sp
import six
from matplotlib.pyplot import *
In [10]:
'''
Analyzing results of student2 with 2 skills, with training length 2 and testing length 2.
Trying to find an epoch to stop training when the loss has converged enough
'''
data11 = np.load('experiments/test2_model2_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/stats-runBinCE-A.npz')
vloss = data11['vloss']
#scores = data21['scores'][:,0]
#initialq = data51['qvals'][:,0]
#opts = data61['opts']
#qfuncs = data61['qs'][:,0,:,:]
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.6369
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,60],[x_end,x_end],color='#ff0000')
plot([0,60],[x_thres,x_thres],color='#ff0000')
#xlim(30,40)
ylim(0.63,0.64)
# looks like epoch 40 for 3 significant digits
Out[10]:
In [17]:
'''
Analyzing results of student2 with 2 skills, with training length 2 and testing length 2. Simpler model.
Trying to find an epoch to stop training when the loss has converged enough
'''
data11 = np.load('experiments/test2_model2simple_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/stats-runBinCE-A.npz')
vloss = data11['vloss']
#scores = data21['scores'][:,0]
#initialq = data51['qvals'][:,0]
#opts = data61['opts']
#qfuncs = data61['qs'][:,0,:,:]
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.6219
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,60],[x_end,x_end],color='#ff0000')
plot([0,60],[x_thres,x_thres],color='#ff0000')
#xlim(30,40)
ylim(0.62,0.63)
# looks like epoch 40 for 3 significant digits
Out[17]:
In [18]:
'''
Analyzing results of student2 with 2 skills, with training length 2 and testing length 2. Simpler model.
Trying to find an epoch to stop training when the loss has converged enough
'''
data11 = np.load('experiments/test2_model2gru_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/stats-runBinCE-A.npz')
vloss = data11['vloss']
#scores = data21['scores'][:,0]
#initialq = data51['qvals'][:,0]
#opts = data61['opts']
#qfuncs = data61['qs'][:,0,:,:]
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.6219
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,60],[x_end,x_end],color='#ff0000')
plot([0,60],[x_thres,x_thres],color='#ff0000')
#xlim(30,40)
ylim(0.62,0.63)
# looks like epoch 30 for 3 significant digits, but might as well also do 40 like the other two
Out[18]:
In [2]:
'''
Analyzing results of student2 with 2 skills, with training length 2 and testing length 2.
Looking at how many good models there are and the correlation between good and training loss.
'''
data11 = np.load('experiments/test2_model2_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/stats-runBinCE-A.npz')
data21 = np.load('experiments/test2_model2_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/mcts-rtype2-rollouts1000-trajectories100-real1-runBinCE-A.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
#initialq = data51['qvals'][:,0]
#opts = data61['opts']
#qfuncs = data61['qs'][:,0,:,:]
six.print_('vloss shape {}'.format(vloss.shape))
six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
xs = np.sum(vloss,axis=1)
ys = scores
title('Training Loss AUC versus Actual Performance')
xlabel('Training Loss AUC')
ylabel('Posttest Score')
plot(xs,ys,'.')
Out[2]:
In [3]:
'''
Analyzing results of student2 with 2 skills, with training length 2 and testing length 2. Simple model.
Looking at how many good models there are and the correlation between good and training loss.
'''
data11 = np.load('experiments/test2_model2simple_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/stats-runBinCE-A.npz')
data21 = np.load('experiments/test2_model2simple_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/mcts-rtype2-rollouts1000-trajectories100-real1-runBinCE-A.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
#initialq = data51['qvals'][:,0]
#opts = data61['opts']
#qfuncs = data61['qs'][:,0,:,:]
six.print_('vloss shape {}'.format(vloss.shape))
six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
xs = np.sum(vloss,axis=1)
ys = scores
title('Training Loss AUC versus Actual Performance')
xlabel('Training Loss AUC')
ylabel('Posttest Score')
plot(xs,ys,'.')
Out[3]:
In [4]:
'''
Analyzing results of student2 with 2 skills, with training length 2 and testing length 2. GRU
Looking at how many good models there are and the correlation between good and training loss.
'''
data11 = np.load('experiments/test2_model2gru_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/stats-runBinCE-A.npz')
data21 = np.load('experiments/test2_model2gru_tiny-dropout10-shuffle0-data-test2-n10000-l2-random.pickle/mcts-rtype2-rollouts1000-trajectories100-real1-runBinCE-A.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
#initialq = data51['qvals'][:,0]
#opts = data61['opts']
#qfuncs = data61['qs'][:,0,:,:]
six.print_('vloss shape {}'.format(vloss.shape))
six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
xs = np.sum(vloss,axis=1)
ys = scores
title('Training Loss AUC versus Actual Performance')
xlabel('Training Loss AUC')
ylabel('Posttest Score')
plot(xs,ys,'.')
Out[4]:
In [ ]: