Testing MCTS with Various Models

Here we will test MCTS first with the DKT model



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
%matplotlib inline
import six
import dynamics_model_class as dmc
import mcts_tests as mc
import mcts
import model_training as mtrain
import forwardsearch as fsearch
import numpy as np
import scipy as sp
from matplotlib.pyplot import *
import dataset_utils
import tensorflow as tf
import tflearn
import time
import copy

from concept_dependency_graph import ConceptDependencyGraph
import data_generator as dg
from student import *
import simple_mdp as sm



In [3]:

    
n_concepts = 4
use_student2 = True
transition_after = True
student2_str = ('2' if use_student2 else '') + ('a' if use_student2 and transition_after else '')
learn_prob = 0.5
lp_str = '-lp{}'.format(int(learn_prob*100)) if not use_student2 else ''
n_students = 100000
seqlen = 5
filter_mastery = False
filter_str = '' if not filter_mastery else '-filtered'
policy = 'random'
epsilon = 0.3
epsilon_str = '{:.2f}'.format(epsilon) if policy == 'egreedy' else ''
filename = 'test{}-w{}-n{}-l{}{}-{}{}{}.pickle'.format(student2_str, n_concepts, n_students, seqlen,
                                                    lp_str, policy, epsilon_str, filter_str)
#concept_tree = sm.create_custom_dependency()
concept_tree = ConceptDependencyGraph()
concept_tree.init_default_tree(n_concepts)
if not use_student2:
    test_student = Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
else:
    test_student = Student2(n_concepts, transition_after=transition_after)
six.print_(filename)









    



test2a-w4-n100000-l5-random.pickle



In [4]:

    
# Generates the data
# Only run this cell if need to generate new data, otherwise skip this
if False:
    print ("Initializing synthetic data sets...")
    dg.generate_data(concept_tree, student=test_student, n_students=n_students, filter_mastery=filter_mastery, seqlen=seqlen, policy=policy, epsilon=epsilon, filename="{}{}".format(dg.SYN_DATA_DIR, filename))
    print ("Data generation completed. ")



In [5]:

    
# load toy data
data = dataset_utils.load_data(filename='{}{}'.format(dg.SYN_DATA_DIR, filename))



In [6]:

    
print('Average posttest: {}'.format(sm.expected_reward(data)))
print('Average sparse reward: {}'.format(sm.expected_sparse_reward(data)))
print('Percent of full posttest score: {}'.format(sm.percent_complete(data)))
print('Percent of all seen: {}'.format(sm.percent_all_seen(data)))
for t in data[0]:
    six.print_(t)









    



Average posttest: 0.4380675
Average sparse reward: 0.0
Percent of full posttest score: 0.0
Percent of all seen: 0.38158
(array([0, 0, 1, 0]), False, array([ 1.,  0.,  0.,  0.]), array([1, 0, 0, 0, 0, 0, 0, 0]))
(array([0, 0, 0, 1]), False, array([ 1.,  0.,  0.,  0.]), array([1, 0, 0, 0, 0, 0, 1, 0]))
(array([0, 1, 0, 0]), False, array([ 1.,  0.,  0.,  0.]), array([1, 0, 0, 0, 0, 0, 1, 0]))
(array([1, 0, 0, 0]), True, array([ 1.,  0.,  0.,  0.]), array([1, 0, 0, 0, 0, 1, 1, 0]))
(array([0, 0, 1, 0]), False, array([ 1.,  0.,  1.,  0.]), array([1, 0, 0, 0, 1, 1, 1, 0]))



In [7]:

    
input_data_, output_mask_, target_data_ = dataset_utils.preprocess_data_for_rnn(data)

six.print_(input_data_.shape)
six.print_(output_mask_.shape)
six.print_(target_data_.shape)
six.print_(output_mask_[0,:,:])
six.print_(target_data_[0,:,:])









    



(100000, 4, 8)
(100000, 4, 4)
(100000, 4, 4)
[[ 0.  0.  0.  1.]
 [ 0.  1.  0.  0.]
 [ 1.  0.  0.  0.]
 [ 0.  0.  1.  0.]]
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 1.  0.  0.  0.]
 [ 0.  0.  0.  0.]]



In [12]:

    
# test_model hidden=16
# test_model_mid hidden=10
# test_model_small hidden=5
# test_model_tiny hidden=3
model_id = "test2_modelgrusimple_large"
dmodel = dmc.DynamicsModel(model_id=model_id, timesteps=seqlen-1, output_dropout=0.5,load_checkpoint=False)
#dmodel2 = dmc.DynamicsModel(model_id=model_id, timesteps=seqlen-1 load_checkpoint=False)



In [13]:

    
# extract out the training states
class ExtractCallback(tflearn.callbacks.Callback):
    def __init__(self):
        self.tstates = []
    def on_epoch_begin(self,ts):
        self.tstates.append([])
    def on_batch_end(self,ts,snapshot):
        self.tstates[-1].append(copy.copy(ts))
ecall = ExtractCallback()



In [14]:

    
num_epoch_batch = 50
num_epoch_per_batch = 1
for eb in six.moves.range(num_epoch_batch):
    # add noise to each batch
    processed_input_data = input_data_ + 0.05*np.random.randn(*input_data_.shape)
    train_data = (processed_input_data[:,:,:], output_mask_[:,:,:], target_data_[:,:,:])
    dmodel.train(train_data,
                 n_epoch=num_epoch_per_batch,
                 callbacks=ecall,
                 load_checkpoint=False,
                 shuffle=True,
                 validation_set=0.1,
                 batch_size=None)
    #dmodel2.train(train_data,
    #             n_epoch=num_epoch_per_batch,
    #             callbacks=ecall,
    #             load_checkpoint=False,
    #             shuffle=True,
    #             validation_set=0.1,
    #            batch_size=None)









    



Training Step: 70349  | total loss: 0.00010 | time: 8.247s
| Adam | epoch: 050 | loss: 0.00010 -- iter: 89984/90000
Training Step: 70350  | total loss: 0.00009 | time: 9.492s
| Adam | epoch: 050 | loss: 0.00009 | val_loss: 0.00000 -- iter: 90000/90000
--



In [15]:

    
# binary crossentropy doesn't work
train_loss = np.array([np.mean([ts.global_loss for ts in batch]) for batch in ecall.tstates])
val_loss = np.array([batch[-1].val_loss if batch[-1].val_loss is not None else 0.0 for batch in ecall.tstates])
figure()
plot(train_loss)
plot(val_loss)
last_val_loss = np.mean(val_loss[-2:])
val_loss_thres = last_val_loss + 0.000001
last_train_loss = np.mean(train_loss[-2:])
#xlim(40,50)
six.print_('Train Loss Limit: {:.8f}'.format(last_train_loss))
six.print_('Val Loss Limit: {:.8f} Thresh: {:.8f}'.format(last_val_loss, val_loss_thres))
plot([0,train_loss.shape[0]], [last_val_loss,last_val_loss], color='#ff0000')
plot([0,train_loss.shape[0]], [val_loss_thres,val_loss_thres], color='#ff0000')
#ylim(last_val_loss - 0.00001, last_val_loss + 0.00002)
ylim(0,0.0002)









    



Train Loss Limit: 0.00001769
Val Loss Limit: 0.00000001 Thresh: 0.00000101






    Out[15]:





(0, 0.0002)



In [12]:

    
# predict a bit
dataix = 4
test_input_data = input_data_[dataix:dataix+1,:,:]
six.print_(test_input_data)
six.print_(data[dataix][-1])
prediction = dmodel.predict(test_input_data)
six.print_(prediction)
# add one observation of each type and see what the predictions are
for nexta in six.moves.range(n_concepts*2):
    obvec = np.zeros((n_concepts*2,))
    obvec[nexta] = 1.0
    #six.print_(test_input_data[0,:,:].shape)
    #six.print_(obvec[np.newaxis,:].shape)
    test_input_data2 = np.vstack((test_input_data[0,:,:],obvec[np.newaxis,:]))
    six.print_('Next observation: {}'.format(obvec))
    prediction = dmodel.predict(test_input_data2[np.newaxis,:,:])
    #prediction2 = dmodel2.predict(test_input_data2[np.newaxis,:,:])
    six.print_('Next prediction: ' + ' '.join('{:.4f}'.format(x) for x in prediction[0,seqlen-2,:]))
    #six.print_('Next prediction2: ' + ' '.join('{:.4f}'.format(x) for x in prediction2[0,seqlen-2,:]))









    



[[[ 0.  0.  0.  0.  0.  1.  0.  0.]
  [ 1.  0.  0.  0.  0.  0.  0.  0.]
  [ 1.  0.  0.  0.  0.  0.  0.  0.]
  [ 1.  0.  0.  0.  0.  0.  0.  0.]]]
(array([0, 0, 1, 0]), False, array([ 1.,  0.,  0.,  0.]), array([1, 0, 0, 0, 1, 1, 0, 0]))
[[[  9.99985218e-01   5.04969212e-04   1.54847169e-06   1.31130691e-05]
  [  9.99995470e-01   9.66183579e-05   3.82032226e-07   1.90812625e-06]
  [  9.99994874e-01   5.48062162e-05   3.45816744e-07   1.80507800e-06]
  [  9.99993443e-01   2.35758762e-05   3.82853955e-07   1.74886941e-06]]]
Next observation: [ 1.  0.  0.  0.  0.  0.  0.  0.]
Next prediction: 1.0000 0.0000 0.0000 0.0000
Next observation: [ 0.  1.  0.  0.  0.  0.  0.  0.]
Next prediction: 1.0000 0.0000 0.0000 0.0000
Next observation: [ 0.  0.  1.  0.  0.  0.  0.  0.]
Next prediction: 1.0000 0.0000 0.0000 0.0000
Next observation: [ 0.  0.  0.  1.  0.  0.  0.  0.]
Next prediction: 1.0000 0.0000 0.0000 0.0000
Next observation: [ 0.  0.  0.  0.  1.  0.  0.  0.]
Next prediction: 1.0000 0.0000 0.0000 0.0000
Next observation: [ 0.  0.  0.  0.  0.  1.  0.  0.]
Next prediction: 1.0000 0.0000 0.0000 0.0000
Next observation: [ 0.  0.  0.  0.  0.  0.  1.  0.]
Next prediction: 1.0000 0.0000 0.0000 0.0000
Next observation: [ 0.  0.  0.  0.  0.  0.  0.  1.]
Next prediction: 1.0000 0.0000 0.0000 0.0000



In [13]:

    
# save the model to a checkpoint file
chkpt = 'tempmodel'
dmodel.save(chkpt)

#chkpt2 = 'tempmodel2'
#dmodel2.save(chkpt2)









    



WARNING:tensorflow:Error encountered when serializing layer_tensor/gru_1.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'



In [17]:

    
# test memoization
mem_chkpt = 'mem_tempmodel.npz'
starttime = time.time()
mtrain.dkt_memoize_single(n_concepts, model_id, chkpt, 6, mem_chkpt)
endtime = time.time()
six.print_('Time Elapsed {}'.format(endtime - starttime))









    



Time Elapsed 2.574920654296875e-05



In [18]:

    
# test the model on the real environment
test_horizon = 6
n_rollouts = 3000
n_trajectories = 8
r_type = mcts.SPARSE

test_student = Student2(n_concepts, transition_after=transition_after)
test_student.reset()
test_student.knowledge[0] = 1 # initialize the first concept to be known
sim = StudentExactSim(test_student.copy(), concept_tree)

starttime = time.time()
acc, bestq = mc.test_dkt_chunk(
    n_trajectories, concept_tree, sim, model_id, [mem_chkpt],
    test_horizon, n_rollouts, r_type, use_real=True, use_mem=True)
endtime = time.time()
six.print_('Time Elapsed {}'.format(endtime - starttime))
six.print_('Acc: {} Best Q: {}'.format(acc, bestq))









    



Time Elapsed 36.59656596183777
Acc: 0.0 Best Q: 0.01673514956945923



In [48]:

    
# test out forward search
starttime = time.time()


# first load mem arrays
mem_arrays = np.load(mem_chkpt)['mem_arrays']

sim = RnnStudent2SimExact(concept_tree)
sim2 = sim.copy()
dkt = dmc.RnnStudentSimMemEnsemble(n_concepts, [mem_arrays])

#fsearch.dkt_forwardsearch_single(n_concepts, dkt, sim, 6)

fsearch.dkt_forwardsearch_single(n_concepts, sim2, sim, 6)

endtime = time.time()
six.print_('Time Elapsed {}'.format(endtime - starttime))









    



Semisparse Value 1.0
Spares Value 1.0
Semisparse Value Sim 1.0
Sparse Value Sim 1.0
Semisparse Q-Values along sim trajectory [(3, array([ 0.75,  0.75,  0.75,  1.  ])), (3, array([ 0.75,  0.75,  0.75,  1.  ])), (2, array([ 0.75,  0.75,  1.  ,  1.  ])), (2, array([ 0.75,  0.75,  1.  ,  1.  ])), (1, array([ 0.75,  1.  ,  1.  ,  0.75])), (1, array([ 0.75,  1.  ,  1.  ,  0.75]))]
Sparse Q-Values along sim trajectory [(3, array([ 0.,  0.,  0.,  1.])), (3, array([ 0.,  0.,  0.,  1.])), (2, array([ 0.,  0.,  1.,  1.])), (2, array([ 0.,  0.,  1.,  1.])), (1, array([ 0.,  1.,  1.,  0.])), (1, array([ 0.,  1.,  1.,  0.]))]
Semisparse Sim Q-Values along sim trajectory [(3, array([ 0.75,  0.75,  0.75,  1.  ])), (3, array([ 0.75,  0.75,  0.75,  1.  ])), (2, array([ 0.75,  0.75,  1.  ,  1.  ])), (2, array([ 0.75,  0.75,  1.  ,  1.  ])), (1, array([ 0.75,  1.  ,  1.  ,  0.75])), (1, array([ 0.75,  1.  ,  1.  ,  0.75]))]
Sparse Sim Q-Values along sim trajectory [(3, array([ 0.,  0.,  0.,  1.])), (3, array([ 0.,  0.,  0.,  1.])), (2, array([ 0.,  0.,  1.,  1.])), (2, array([ 0.,  0.,  1.,  1.])), (1, array([ 0.,  1.,  1.,  0.])), (1, array([ 0.,  1.,  1.,  0.]))]
Time Elapsed 29.878859996795654



In [ ]: