In [35]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datetime
import tensorflow as tf
import tflearn
import numpy as np
from sklearn.model_selection import train_test_split

import drqn
import student as st

import data_generator as dg
import concept_dependency_graph as cdg
from experience_buffer import ExperienceBuffer
import dataset_utils as d_utils
import utils
import models_dict_utils

%load_ext autoreload
%autoreload 2
%reload_ext autoreload









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Preprocessing Data for DRQN

We take the data from data generator and save them into traces of (s,a,r,sp) tuples.

Each trajectory corresponds to a trace.

If trajectory has length n, then trace will have length n-1. (since we need the next state sp)



In [16]:

    
data = d_utils.load_data(filename="../synthetic_data/test-n10000-l3-random.pickle")
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="dense")



In [4]:

    
# Single Trace
print (dqn_data[0])









    



[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]), array([ 1.,  0.,  0.,  0.,  0.]), 0.20000000000000001, array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])], [array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  1.]), 0.20000000000000001, array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]



In [5]:

    
# First tuple in a trace
s,a,r,sp = dqn_data[0][0]
print (s)
print (a)
print (r)
print (sp)









    



[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
[ 1.  0.  0.  0.  0.]
0.2
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]



In [6]:

    
# Last tuple
s,a,r,sp = dqn_data[0][-1]
print (s)
print (a)
print (r)
print (sp)









    



[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  1.]
0.2
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]



In [17]:

    
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)

Creating a DRQN model and training it



In [12]:

    
model_id = "test_model_drqn"



In [13]:

    
# Create the model object
model = drqn.DRQNModel(model_id, timesteps=2)



In [14]:

    
# Initialize trainer object inside the model
model.init_trainer()



In [18]:

    
# Creating training and validation data
train_buffer = ExperienceBuffer()
train_buffer.buffer = dqn_data_train
train_buffer.buffer_sz = len(train_buffer.buffer)

val_buffer = ExperienceBuffer()
val_buffer.buffer = dqn_data_test
val_buffer.buffer_sz = len(val_buffer.buffer)



In [23]:

    
# train the model (uses the previously initialized trainer object)
date_time_string = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
run_id = "{}".format(date_time_string)
model.train(train_buffer, val_buffer, n_epoch=2,
              run_id=run_id, load_checkpoint=True)









    



Training Step: 39249  | total loss: 0.08859 | time: 2.143s
| Optimizer | epoch: 007 | loss: 0.08859 -- iter: 7936/8000
Training Step: 39250  | total loss: 0.08909 | time: 3.175s
| Optimizer | epoch: 007 | loss: 0.08909 | val_loss: 0.08739 -- iter: 8000/8000
--
WARNING:tensorflow:Error encountered when serializing layer_tensor/lstm_2.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
WARNING:tensorflow:Error encountered when serializing layer_tensor/lstm_1.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'



In [24]:

    
# init evaluator of the model
model.init_evaluator()



In [25]:

    
# Create inputs (states / observations so far) to use for predictions
from drqn import stack_batch
train_batch = train_buffer.sample_in_order(4)

# make sure that batches are over multiple timesteps, should be of shape (batch_sz, n_timesteps, ?)
s_batch_train = stack_batch(train_batch[:, :, 0])  # current states



In [26]:

    
# Use model to predict next action
actions, q_vals = model.predict(s_batch_train, last_timestep_only=True)



In [27]:

    
q_vals









    Out[27]:





array([[ 0.30861092,  0.30838254,  0.30854946,  0.30801374,  0.30847904],
       [ 0.31383398,  0.31383744,  0.31381938,  0.31383258,  0.31286329],
       [ 0.44942483,  0.44977263,  0.44959086,  0.4496491 ,  0.44962877],
       [ 0.32500198,  0.32455364,  0.32502168,  0.32404408,  0.32474533]])



In [28]:

    
actions









    Out[28]:





array([0, 1, 1, 2])



In [ ]:



In [29]:

    
# if we want to predict on data with different number of timesteps then we trained on, 
# create a new model but using the same checkpoint

eval_model = drqn.DRQNModel(model_id, timesteps=10)



In [30]:

    
eval_model.init_evaluator()
# now the internal RNN will be unrolled over 10 timesteps. 
# You can still pass in inputs that have fewer than 10, in which case remaining timesteps will be padded.



In [31]:

    
eval_model.predict(s_batch_train, last_timestep_only=True)









    Out[31]:





(array([0, 1, 1, 2]),
 array([[ 0.30861092,  0.30838254,  0.30854946,  0.30801374,  0.30847904],
        [ 0.31383398,  0.31383744,  0.31381938,  0.31383258,  0.31286329],
        [ 0.44942483,  0.44977263,  0.44959086,  0.4496491 ,  0.44962877],
        [ 0.32500198,  0.32455364,  0.32502168,  0.32404408,  0.32474533]]))



In [ ]:

Testing the model



In [11]:

    
from drqn_tests import *



In [3]:

    
n_trajectories = 10
n_concepts = 5
horizon = 6
model_id = "test_model_drqn"
from simple_mdp import create_custom_dependency
dgraph = create_custom_dependency()



In [4]:

    
test_model = drqn.DRQNModel(model_id=model_id, timesteps=horizon)
test_model.init_evaluator()



In [5]:

    
learn_prob = 0.15
student = st.Student(n=n_concepts, p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)



In [6]:

    
k = test_drqn_single(dgraph, student, horizon, test_model, DEBUG=True)









    



ERROR [ 1.  0.  0.  0.  0.] executed non-optimal action 0
ERROR [ 1.  0.  0.  0.  0.] executed non-optimal action 0



In [7]:

    
k









    Out[7]:





array([ 1.,  0.,  0.,  0.,  0.])



In [9]:

    
test_drqn_chunk(n_trajectories, dgraph, student, model_id, horizon)









    



traj i 0
traj i 1
traj i 2
traj i 3
traj i 4
traj i 5
traj i 6
traj i 7
traj i 8
traj i 9






    Out[9]:





0.29999999999999999

Final Test Function:



In [10]:

    
test_drqn(model_id=model_id)









    



Testing model: test_model_drqn
horizon: 6
traj i 0
traj i 1
traj i 2
traj i 3
traj i 4
traj i 5
traj i 6
traj i 7
traj i 8
traj i 9
Generating data for 1000 students with behavior policy expert and sequence length 6.
Average posttest true: 0.385
Average posttest drqn: 0.22

General Workflow

1. Create Data Set



In [34]:

    
n_concepts = 4
use_student2 = True
student2_str = '2' if use_student2 else ''
learn_prob = 0.15
lp_str = '-lp{}'.format(int(learn_prob*100)) if not use_student2 else ''
n_students = 100000
seqlen = 7
filter_mastery = False
filter_str = '' if not filter_mastery else '-filtered'
policy = 'random'
filename = 'test{}-n{}-l{}{}-{}{}.pickle'.format(student2_str, n_students, seqlen,
                                                    lp_str, policy, filter_str)
#concept_tree = sm.create_custom_dependency()
concept_tree = cdg.ConceptDependencyGraph()
concept_tree.init_default_tree(n_concepts)
if not use_student2:
    test_student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
else:
    test_student = st.Student2(n_concepts)
print(filename)









    



test2-n100000-l7-random.pickle



In [36]:

    
print ("Initializing synthetic data sets...")
dg.generate_data(concept_tree, student=test_student, n_students=n_students, filter_mastery=filter_mastery, seqlen=seqlen, policy=policy, filename="{}{}".format(dg.SYN_DATA_DIR, filename))
print ("Data generation completed. ")









    



Initializing synthetic data sets...
Generating data for 100000 students with behavior policy random and sequence length 7.
Data generation completed.



In [38]:

    
data = d_utils.load_data(filename="../synthetic_data/{}".format(filename))
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="dense")
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)



In [ ]:

    
# Creating training and validation data
train_buffer = ExperienceBuffer()
train_buffer.buffer = dqn_data_train
train_buffer.buffer_sz = len(train_buffer.buffer)

val_buffer = ExperienceBuffer()
val_buffer.buffer = dqn_data_test
val_buffer.buffer_sz = len(val_buffer.buffer)

2. Create Model and Train



In [48]:

    
model_id = "test2_model_drqn_mid"
model = drqn.DRQNModel(model_id, timesteps=seqlen-1)
model.init_trainer()









    



Loaded model test2_model_drqn_mid



In [44]:

    
# train the model (uses the previously initialized trainer object)
date_time_string = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
run_id = "{}".format(date_time_string)
model.train(train_buffer, val_buffer, n_epoch=32,
              run_id=run_id, load_checkpoint=True)









    



Training Step: 6863  | total loss: 0.03597 | time: 15.444s
| Optimizer | epoch: 006 | loss: 0.03597 -- iter: 39232/80000






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-44-7d5db90633ac> in <module>()
      3 run_id = "{}".format(date_time_string)
      4 model.train(train_buffer, val_buffer, n_epoch=32,
----> 5               run_id=run_id, load_checkpoint=True)

/Users/lisa1010/dev/smart-tutor/code/drqn.pyc in train(self, train_buffer, val_buffer, n_epoch, run_id, load_checkpoint, load_ckpt_path)
    156         self.trainer.fit({q_inputs: s_batch_train, a: a_batch_train, r: r_batch_train, target_inputs: sp_batch_train},
    157                     val_feed_dicts={q_inputs: s_batch_val, a: a_batch_val, r: r_batch_val, target_inputs: sp_batch_val},
--> 158                     n_epoch=n_epoch, snapshot_epoch=True, run_id=run_id)
    159 
    160 

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tflearn/helpers/trainer.pyc in fit(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id, callbacks)
    331                                                        (bool(self.best_checkpoint_path) | snapshot_epoch),
    332                                                        snapshot_step,
--> 333                                                        show_metric)
    334 
    335                             # Update training state

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tflearn/helpers/trainer.pyc in _train(self, training_step, snapshot_epoch, snapshot_step, show_metric)
    772         tflearn.is_training(True, session=self.session)
    773         _, train_summ_str = self.session.run([self.train, self.summ_op],
--> 774                                              feed_batch)
    775 
    776         # Retrieve loss value from summary string

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    963     if final_fetches or final_targets:
    964       results = self._do_run(handle, final_targets, final_fetches,
--> 965                              feed_dict_string, options, run_metadata)
    966     else:
    967       results = []

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1013     if handle is None:
   1014       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015                            target_list, options, run_metadata)
   1016     else:
   1017       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1020   def _do_call(self, fn, *args):
   1021     try:
-> 1022       return fn(*args)
   1023     except errors.OpError as e:
   1024       message = compat.as_text(e.message)

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002         return tf_session.TF_Run(session, options,
   1003                                  feed_dict, fetch_list, target_list,
-> 1004                                  status, run_metadata)
   1005 
   1006     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt:

3. Test Model in "real world" and calculate post test scores



In [ ]:

    
test_drqn(model_id=model_id)









    



Testing model: test2_model_drqn_mid
horizon: 6
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid



In [ ]: