In [35]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datetime
import tensorflow as tf
import tflearn
import numpy as np
from sklearn.model_selection import train_test_split

import drqn
import student as st

import data_generator as dg
import concept_dependency_graph as cdg
from experience_buffer import ExperienceBuffer
import dataset_utils as d_utils
import utils
import models_dict_utils

%load_ext autoreload
%autoreload 2
%reload_ext autoreload


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Preprocessing Data for DRQN

We take the data from data generator and save them into traces of (s,a,r,sp) tuples.

Each trajectory corresponds to a trace.

If trajectory has length n, then trace will have length n-1. (since we need the next state sp)


In [16]:
data = d_utils.load_data(filename="../synthetic_data/test-n10000-l3-random.pickle")
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="dense")

In [4]:
# Single Trace
print (dqn_data[0])


[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]), array([ 1.,  0.,  0.,  0.,  0.]), 0.20000000000000001, array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])], [array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  1.]), 0.20000000000000001, array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]

In [5]:
# First tuple in a trace
s,a,r,sp = dqn_data[0][0]
print (s)
print (a)
print (r)
print (sp)


[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
[ 1.  0.  0.  0.  0.]
0.2
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]

In [6]:
# Last tuple
s,a,r,sp = dqn_data[0][-1]
print (s)
print (a)
print (r)
print (sp)


[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  1.]
0.2
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]

In [17]:
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)

Creating a DRQN model and training it


In [12]:
model_id = "test_model_drqn"

In [13]:
# Create the model object
model = drqn.DRQNModel(model_id, timesteps=2)

In [14]:
# Initialize trainer object inside the model
model.init_trainer()

In [18]:
# Creating training and validation data
train_buffer = ExperienceBuffer()
train_buffer.buffer = dqn_data_train
train_buffer.buffer_sz = len(train_buffer.buffer)

val_buffer = ExperienceBuffer()
val_buffer.buffer = dqn_data_test
val_buffer.buffer_sz = len(val_buffer.buffer)

In [23]:
# train the model (uses the previously initialized trainer object)
date_time_string = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
run_id = "{}".format(date_time_string)
model.train(train_buffer, val_buffer, n_epoch=2,
              run_id=run_id, load_checkpoint=True)


Training Step: 39249  | total loss: 0.08859 | time: 2.143s
| Optimizer | epoch: 007 | loss: 0.08859 -- iter: 7936/8000
Training Step: 39250  | total loss: 0.08909 | time: 3.175s
| Optimizer | epoch: 007 | loss: 0.08909 | val_loss: 0.08739 -- iter: 8000/8000
--
WARNING:tensorflow:Error encountered when serializing layer_tensor/lstm_2.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
WARNING:tensorflow:Error encountered when serializing layer_tensor/lstm_1.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'

In [24]:
# init evaluator of the model
model.init_evaluator()

In [25]:
# Create inputs (states / observations so far) to use for predictions
from drqn import stack_batch
train_batch = train_buffer.sample_in_order(4)

# make sure that batches are over multiple timesteps, should be of shape (batch_sz, n_timesteps, ?)
s_batch_train = stack_batch(train_batch[:, :, 0])  # current states

In [26]:
# Use model to predict next action
actions, q_vals = model.predict(s_batch_train, last_timestep_only=True)

In [27]:
q_vals


Out[27]:
array([[ 0.30861092,  0.30838254,  0.30854946,  0.30801374,  0.30847904],
       [ 0.31383398,  0.31383744,  0.31381938,  0.31383258,  0.31286329],
       [ 0.44942483,  0.44977263,  0.44959086,  0.4496491 ,  0.44962877],
       [ 0.32500198,  0.32455364,  0.32502168,  0.32404408,  0.32474533]])

In [28]:
actions


Out[28]:
array([0, 1, 1, 2])

In [ ]:


In [29]:
# if we want to predict on data with different number of timesteps then we trained on, 
# create a new model but using the same checkpoint

eval_model = drqn.DRQNModel(model_id, timesteps=10)

In [30]:
eval_model.init_evaluator()
# now the internal RNN will be unrolled over 10 timesteps. 
# You can still pass in inputs that have fewer than 10, in which case remaining timesteps will be padded.

In [31]:
eval_model.predict(s_batch_train, last_timestep_only=True)


Out[31]:
(array([0, 1, 1, 2]),
 array([[ 0.30861092,  0.30838254,  0.30854946,  0.30801374,  0.30847904],
        [ 0.31383398,  0.31383744,  0.31381938,  0.31383258,  0.31286329],
        [ 0.44942483,  0.44977263,  0.44959086,  0.4496491 ,  0.44962877],
        [ 0.32500198,  0.32455364,  0.32502168,  0.32404408,  0.32474533]]))

In [ ]:

Testing the model


In [11]:
from drqn_tests import *

In [3]:
n_trajectories = 10
n_concepts = 5
horizon = 6
model_id = "test_model_drqn"
from simple_mdp import create_custom_dependency
dgraph = create_custom_dependency()

In [4]:
test_model = drqn.DRQNModel(model_id=model_id, timesteps=horizon)
test_model.init_evaluator()

In [5]:
learn_prob = 0.15
student = st.Student(n=n_concepts, p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)

In [6]:
k = test_drqn_single(dgraph, student, horizon, test_model, DEBUG=True)


ERROR [ 1.  0.  0.  0.  0.] executed non-optimal action 0
ERROR [ 1.  0.  0.  0.  0.] executed non-optimal action 0

In [7]:
k


Out[7]:
array([ 1.,  0.,  0.,  0.,  0.])

In [9]:
test_drqn_chunk(n_trajectories, dgraph, student, model_id, horizon)


traj i 0
traj i 1
traj i 2
traj i 3
traj i 4
traj i 5
traj i 6
traj i 7
traj i 8
traj i 9
Out[9]:
0.29999999999999999

Final Test Function:


In [10]:
test_drqn(model_id=model_id)


Testing model: test_model_drqn
horizon: 6
traj i 0
traj i 1
traj i 2
traj i 3
traj i 4
traj i 5
traj i 6
traj i 7
traj i 8
traj i 9
Generating data for 1000 students with behavior policy expert and sequence length 6.
Average posttest true: 0.385
Average posttest drqn: 0.22

General Workflow

1. Create Data Set


In [34]:
n_concepts = 4
use_student2 = True
student2_str = '2' if use_student2 else ''
learn_prob = 0.15
lp_str = '-lp{}'.format(int(learn_prob*100)) if not use_student2 else ''
n_students = 100000
seqlen = 7
filter_mastery = False
filter_str = '' if not filter_mastery else '-filtered'
policy = 'random'
filename = 'test{}-n{}-l{}{}-{}{}.pickle'.format(student2_str, n_students, seqlen,
                                                    lp_str, policy, filter_str)
#concept_tree = sm.create_custom_dependency()
concept_tree = cdg.ConceptDependencyGraph()
concept_tree.init_default_tree(n_concepts)
if not use_student2:
    test_student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
else:
    test_student = st.Student2(n_concepts)
print(filename)


test2-n100000-l7-random.pickle

In [36]:
print ("Initializing synthetic data sets...")
dg.generate_data(concept_tree, student=test_student, n_students=n_students, filter_mastery=filter_mastery, seqlen=seqlen, policy=policy, filename="{}{}".format(dg.SYN_DATA_DIR, filename))
print ("Data generation completed. ")


Initializing synthetic data sets...
Generating data for 100000 students with behavior policy random and sequence length 7.
Data generation completed. 

In [38]:
data = d_utils.load_data(filename="../synthetic_data/{}".format(filename))
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="dense")
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)

In [ ]:
# Creating training and validation data
train_buffer = ExperienceBuffer()
train_buffer.buffer = dqn_data_train
train_buffer.buffer_sz = len(train_buffer.buffer)

val_buffer = ExperienceBuffer()
val_buffer.buffer = dqn_data_test
val_buffer.buffer_sz = len(val_buffer.buffer)

2. Create Model and Train


In [48]:
model_id = "test2_model_drqn_mid"
model = drqn.DRQNModel(model_id, timesteps=seqlen-1)
model.init_trainer()


Loaded model test2_model_drqn_mid

In [44]:
# train the model (uses the previously initialized trainer object)
date_time_string = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
run_id = "{}".format(date_time_string)
model.train(train_buffer, val_buffer, n_epoch=32,
              run_id=run_id, load_checkpoint=True)


Training Step: 6863  | total loss: 0.03597 | time: 15.444s
| Optimizer | epoch: 006 | loss: 0.03597 -- iter: 39232/80000
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-44-7d5db90633ac> in <module>()
      3 run_id = "{}".format(date_time_string)
      4 model.train(train_buffer, val_buffer, n_epoch=32,
----> 5               run_id=run_id, load_checkpoint=True)

/Users/lisa1010/dev/smart-tutor/code/drqn.pyc in train(self, train_buffer, val_buffer, n_epoch, run_id, load_checkpoint, load_ckpt_path)
    156         self.trainer.fit({q_inputs: s_batch_train, a: a_batch_train, r: r_batch_train, target_inputs: sp_batch_train},
    157                     val_feed_dicts={q_inputs: s_batch_val, a: a_batch_val, r: r_batch_val, target_inputs: sp_batch_val},
--> 158                     n_epoch=n_epoch, snapshot_epoch=True, run_id=run_id)
    159 
    160 

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tflearn/helpers/trainer.pyc in fit(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id, callbacks)
    331                                                        (bool(self.best_checkpoint_path) | snapshot_epoch),
    332                                                        snapshot_step,
--> 333                                                        show_metric)
    334 
    335                             # Update training state

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tflearn/helpers/trainer.pyc in _train(self, training_step, snapshot_epoch, snapshot_step, show_metric)
    772         tflearn.is_training(True, session=self.session)
    773         _, train_summ_str = self.session.run([self.train, self.summ_op],
--> 774                                              feed_batch)
    775 
    776         # Retrieve loss value from summary string

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    963     if final_fetches or final_targets:
    964       results = self._do_run(handle, final_targets, final_fetches,
--> 965                              feed_dict_string, options, run_metadata)
    966     else:
    967       results = []

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1013     if handle is None:
   1014       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015                            target_list, options, run_metadata)
   1016     else:
   1017       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1020   def _do_call(self, fn, *args):
   1021     try:
-> 1022       return fn(*args)
   1023     except errors.OpError as e:
   1024       message = compat.as_text(e.message)

/Users/lisa1010/tf_venv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002         return tf_session.TF_Run(session, options,
   1003                                  feed_dict, fetch_list, target_list,
-> 1004                                  status, run_metadata)
   1005 
   1006     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

3. Test Model in "real world" and calculate post test scores


In [ ]:
test_drqn(model_id=model_id)


Testing model: test2_model_drqn_mid
horizon: 6
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid
Loaded model test2_model_drqn_mid

In [ ]: