In [35]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import tensorflow as tf
import tflearn
import numpy as np
from sklearn.model_selection import train_test_split
import drqn
import student as st
import data_generator as dg
import concept_dependency_graph as cdg
from experience_buffer import ExperienceBuffer
import dataset_utils as d_utils
import utils
import models_dict_utils
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
In [16]:
data = d_utils.load_data(filename="../synthetic_data/test-n10000-l3-random.pickle")
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="dense")
In [4]:
# Single Trace
print (dqn_data[0])
In [5]:
# First tuple in a trace
s,a,r,sp = dqn_data[0][0]
print (s)
print (a)
print (r)
print (sp)
In [6]:
# Last tuple
s,a,r,sp = dqn_data[0][-1]
print (s)
print (a)
print (r)
print (sp)
In [17]:
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)
In [12]:
model_id = "test_model_drqn"
In [13]:
# Create the model object
model = drqn.DRQNModel(model_id, timesteps=2)
In [14]:
# Initialize trainer object inside the model
model.init_trainer()
In [18]:
# Creating training and validation data
train_buffer = ExperienceBuffer()
train_buffer.buffer = dqn_data_train
train_buffer.buffer_sz = len(train_buffer.buffer)
val_buffer = ExperienceBuffer()
val_buffer.buffer = dqn_data_test
val_buffer.buffer_sz = len(val_buffer.buffer)
In [23]:
# train the model (uses the previously initialized trainer object)
date_time_string = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
run_id = "{}".format(date_time_string)
model.train(train_buffer, val_buffer, n_epoch=2,
run_id=run_id, load_checkpoint=True)
In [24]:
# init evaluator of the model
model.init_evaluator()
In [25]:
# Create inputs (states / observations so far) to use for predictions
from drqn import stack_batch
train_batch = train_buffer.sample_in_order(4)
# make sure that batches are over multiple timesteps, should be of shape (batch_sz, n_timesteps, ?)
s_batch_train = stack_batch(train_batch[:, :, 0]) # current states
In [26]:
# Use model to predict next action
actions, q_vals = model.predict(s_batch_train, last_timestep_only=True)
In [27]:
q_vals
Out[27]:
In [28]:
actions
Out[28]:
In [ ]:
In [29]:
# if we want to predict on data with different number of timesteps then we trained on,
# create a new model but using the same checkpoint
eval_model = drqn.DRQNModel(model_id, timesteps=10)
In [30]:
eval_model.init_evaluator()
# now the internal RNN will be unrolled over 10 timesteps.
# You can still pass in inputs that have fewer than 10, in which case remaining timesteps will be padded.
In [31]:
eval_model.predict(s_batch_train, last_timestep_only=True)
Out[31]:
In [ ]:
In [11]:
from drqn_tests import *
In [3]:
n_trajectories = 10
n_concepts = 5
horizon = 6
model_id = "test_model_drqn"
from simple_mdp import create_custom_dependency
dgraph = create_custom_dependency()
In [4]:
test_model = drqn.DRQNModel(model_id=model_id, timesteps=horizon)
test_model.init_evaluator()
In [5]:
learn_prob = 0.15
student = st.Student(n=n_concepts, p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
In [6]:
k = test_drqn_single(dgraph, student, horizon, test_model, DEBUG=True)
In [7]:
k
Out[7]:
In [9]:
test_drqn_chunk(n_trajectories, dgraph, student, model_id, horizon)
Out[9]:
In [10]:
test_drqn(model_id=model_id)
In [34]:
n_concepts = 4
use_student2 = True
student2_str = '2' if use_student2 else ''
learn_prob = 0.15
lp_str = '-lp{}'.format(int(learn_prob*100)) if not use_student2 else ''
n_students = 100000
seqlen = 7
filter_mastery = False
filter_str = '' if not filter_mastery else '-filtered'
policy = 'random'
filename = 'test{}-n{}-l{}{}-{}{}.pickle'.format(student2_str, n_students, seqlen,
lp_str, policy, filter_str)
#concept_tree = sm.create_custom_dependency()
concept_tree = cdg.ConceptDependencyGraph()
concept_tree.init_default_tree(n_concepts)
if not use_student2:
test_student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
else:
test_student = st.Student2(n_concepts)
print(filename)
In [36]:
print ("Initializing synthetic data sets...")
dg.generate_data(concept_tree, student=test_student, n_students=n_students, filter_mastery=filter_mastery, seqlen=seqlen, policy=policy, filename="{}{}".format(dg.SYN_DATA_DIR, filename))
print ("Data generation completed. ")
In [38]:
data = d_utils.load_data(filename="../synthetic_data/{}".format(filename))
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="dense")
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)
In [ ]:
# Creating training and validation data
train_buffer = ExperienceBuffer()
train_buffer.buffer = dqn_data_train
train_buffer.buffer_sz = len(train_buffer.buffer)
val_buffer = ExperienceBuffer()
val_buffer.buffer = dqn_data_test
val_buffer.buffer_sz = len(val_buffer.buffer)
In [48]:
model_id = "test2_model_drqn_mid"
model = drqn.DRQNModel(model_id, timesteps=seqlen-1)
model.init_trainer()
In [44]:
# train the model (uses the previously initialized trainer object)
date_time_string = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
run_id = "{}".format(date_time_string)
model.train(train_buffer, val_buffer, n_epoch=32,
run_id=run_id, load_checkpoint=True)
In [ ]:
test_drqn(model_id=model_id)
In [ ]: