In [1]:
# August 4 2017
# This file is for experiments with student2 4 skills and training trajectory length 7
# to check different architectures and whether they can learn good models
# This uses student2a where transitions happen after observations
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import scipy as sp
import six
from matplotlib.pyplot import *
def graph_trainauc_score(vloss, scores):
xs = np.sum(vloss,axis=1)
ys = scores
title('Training Loss AUC versus Actual Performance')
xlabel('Training Loss AUC')
ylabel('Posttest Score')
plot(xs,ys,'.')
ylim(0,1)
In [8]:
'''
Default Double LSTM
Trying to find a training epoch to stop training.
'''
data11 = np.load('experiments/test2_model_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
vloss = data11['vloss']
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.00083
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,20],[x_end,x_end],color='#ff0000')
plot([0,20],[x_thres,x_thres],color='#ff0000')
xlim(10, 15)
ylim(0.0008,0.0009)
# looks like epoch 11 is good for 0.00001
Out[8]:
In [12]:
'''
Single LSTM
Trying to find a training epoch to stop training.
'''
data11 = np.load('experiments/test2_modelsimple_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
vloss = data11['vloss']
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.00001
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,20],[x_end,x_end],color='#ff0000')
plot([0,20],[x_thres,x_thres],color='#ff0000')
xlim(10, 15)
ylim(0.0000,0.00002)
# looks like epoch 11 is good for 0.00001
Out[12]:
In [14]:
'''
Single GRU
Trying to find a training epoch to stop training.
'''
data11 = np.load('experiments/test2_modelgrusimple_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
vloss = data11['vloss']
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.00001
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,20],[x_end,x_end],color='#ff0000')
plot([0,20],[x_thres,x_thres],color='#ff0000')
xlim(5, 10)
ylim(0.0000,0.00002)
# looks like epoch 7 is good for 0.00001
Out[14]:
In [6]:
'''
Analyzing results of student2 with 4 skills, with training length 7 and testing length 6.
Double LSTM
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2_model_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
data21 = np.load('experiments/test2_model_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/mcts-rtype2-rollouts3000-trajectories100-real1-runA.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
qvals = data21['qvals'][:,0]
six.print_('vloss shape {}'.format(vloss.shape))
#six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
six.print_(qvals)
graph_trainauc_score(vloss, scores)
In [7]:
'''
Analyzing results of student2 with 4 skills, with training length 7 and testing length 6.
Single LSTM
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2_modelsimple_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
data21 = np.load('experiments/test2_modelsimple_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/mcts-rtype2-rollouts3000-trajectories100-real1-runA.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
qvals = data21['qvals'][:,0]
six.print_('vloss shape {}'.format(vloss.shape))
#six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
six.print_(qvals)
graph_trainauc_score(vloss, scores)
In [8]:
'''
Analyzing results of student2 with 4 skills, with training length 7 and testing length 6.
Single GRU
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2_modelgrusimple_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
data21 = np.load('experiments/test2_modelgrusimple_mid-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/mcts-rtype2-rollouts3000-trajectories100-real1-runA.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
qvals = data21['qvals'][:,0]
six.print_('vloss shape {}'.format(vloss.shape))
#six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
six.print_(qvals)
graph_trainauc_score(vloss, scores)
In [10]:
'''
Single LSTM
Trying to find a training epoch to stop training.
'''
data11 = np.load('experiments/test2_modelsimple_small-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
vloss = data11['vloss']
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.00002
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,20],[x_end,x_end],color='#ff0000')
plot([0,20],[x_thres,x_thres],color='#ff0000')
#xlim(10, 15)
ylim(0.0000,0.00003)
# looks like 20 epochs is not quite enough for 0.00001, but epoch 20 should be just barely there
Out[10]:
In [15]:
'''
Single GRU
Trying to find a training epoch to stop training.
'''
data11 = np.load('experiments/test2_modelgrusimple_small-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runA.npz')
vloss = data11['vloss']
six.print_(vloss.shape)
figure()
x_avg = np.mean(vloss,axis=0)
x_serr = np.std(vloss,axis=0) / vloss.shape[0] * 1.98
plot(x_avg, color='#0000ff')
plot(x_avg + x_serr, color='#ddddff')
plot(x_avg - x_serr, color='#ddddff')
x_end = np.mean(x_avg[-2:])
x_thres = 0.00001
six.print_('last {} threshold {}'.format(x_end, x_thres))
plot([0,20],[x_end,x_end],color='#ff0000')
plot([0,20],[x_thres,x_thres],color='#ff0000')
xlim(10, 15)
ylim(0.0000,0.00002)
# looks like 14 is enough for 0.00001
Out[15]:
In [2]:
'''
Analyzing results of student2 with 4 skills, with training length 7 and testing length 6.
Single LSTM
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2_modelsimple_small-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runB.npz')
data21 = np.load('experiments/test2_modelsimple_small-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/mcts-rtype2-rollouts3000-trajectories100-real1-runB.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
qvals = data21['qvals'][:,0]
six.print_('vloss shape {}'.format(vloss.shape))
#six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
six.print_(qvals)
graph_trainauc_score(vloss, scores)
In [3]:
'''
Analyzing results of student2 with 4 skills, with training length 7 and testing length 6.
Single GRUE
Looking at correlation between training AUC and actual performance
'''
data11 = np.load('experiments/test2_modelgrusimple_small-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/stats-runB.npz')
data21 = np.load('experiments/test2_modelgrusimple_small-dropout10-shuffle0-data-test2a-n100000-l7-random.pickle/mcts-rtype2-rollouts3000-trajectories100-real1-runB.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
qvals = data21['qvals'][:,0]
six.print_('vloss shape {}'.format(vloss.shape))
#six.print_(vloss)
six.print_('scores shape {}'.format(scores.shape))
six.print_(scores)
six.print_(qvals)
graph_trainauc_score(vloss, scores)
In [ ]: