In [1]:
# August 25 2017
# This file is for experiments with student2 4 skills and training trajectory length 5
# to check different architectures and whether they can learn good models
# This uses student2a where transitions happen after observations
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import scipy as sp
import six
import pickle
from matplotlib.pyplot import *
def ixs2ranks(ixs):
ranks = np.zeros(ixs.shape,dtype=np.int)
for r,i in enumerate(ixs):
ranks[i] = r
return ranks
def extract_ensemble_data(outdata):
split_num = len(outdata)
#six.print_(len(outdata[0]))
#six.print_(len(outdata[0][0]))
#six.print_(len(outdata[0][0][0]))
sq = np.mean([ [ trial[0][1][1] for trial in split] for split in outdata ], axis=1)
sim_sq = np.mean([ [ trial[0][1][3] for trial in split] for split in outdata ], axis=1)
return sq, sim_sq
def extract_ensemble_last_step_error(outdata):
split_num = len(outdata)
#six.print_(len(outdata[0]))
#six.print_(len(outdata[0][0]))
#six.print_(len(outdata[0][0][0]))
random_errors = np.mean([ [ trial[0][1][0][-1] for trial in split] for split in outdata ], axis=1)
random_errors_std = np.std([ [ trial[0][1][0][-1] for trial in split] for split in outdata ], axis=1)
expert_errors = np.mean([ [ trial[0][1][1][-1] for trial in split] for split in outdata ], axis=1)
expert_errors_std = np.std([ [ trial[0][1][1][-1] for trial in split] for split in outdata ], axis=1)
return random_errors, random_errors_std, expert_errors, expert_errors_std
def extract_last_step_error(multistep):
random_errors = np.array([model_output[0][0][-1] for model_output in multistep])
expert_errors = np.array([model_output[0][1][-1] for model_output in multistep])
return random_errors, expert_errors
def average_error_per_step(multistep):
# compute the average over all models of their errors per step
num_models = len(multistep)
random_errors = np.mean([model_output[0][0] for model_output in multistep], axis=0)
expert_errors = np.mean([model_output[0][1] for model_output in multistep], axis=0)
return random_errors, expert_errors
def graph_average_ms(ms):
ms_random, ms_expert = average_error_per_step(ms)
figure()
title('Average Multistep Error Per Step')
xlabel('Step')
ylabel('MSE')
xs = np.arange(ms_random.shape[0])
plot(xs, ms_random,label='random')
plot(xs, ms_expert,label='expert')
legend()
six.print_('Last step random {}'.format(ms_random[-1]))
six.print_('Last step expert {}'.format(ms_expert[-1]))
def graph_trainauc_score(vloss, scores):
figure()
xs = np.sum(vloss,axis=1)
ys = scores
title('Training Loss AUC versus Actual Performance')
xlabel('Training Loss AUC')
ylabel('Posttest Score')
plot(xs,ys,'.')
ylim(0,1)
def graph_trainauc_qval(vloss, qval):
figure()
xs = np.sum(vloss,axis=1)
ys = qval
title('Training Loss AUC versus Own Qvalue')
xlabel('Training Loss AUC')
ylabel('Qvalue')
plot(xs,ys,'.')
def graph_qval_score(qval, scores):
figure()
xs = qval
ys = scores
title('Own Qval versus Actual Performance')
xlabel('Qvalue')
ylabel('Posttest Score')
plot(xs,ys,'.')
def graph_ms_score(ms, scores, policy):
figure()
xs = ms
ys = scores
title('Last Step Multistep Error versus Actual Performance for {} Policy'.format(policy))
xlabel('Last Step Multistep MSE')
ylabel('Posttest Score')
plot(xs,ys,'.')
def graph_ensemble(fsen_sq, fsen_sim_sq):
figure()
title('Ensemble Size versus Actual Performance')
xs = np.array(list(six.moves.range(len(fsen_sq))))*5+5
ys = fsen_sim_sq
xlabel('# of models in ensemble')
ylabel('Posttest Score')
plot(xs, ys, '.')
ylim(-0.1,1.1)
if True:
figure()
title('Ensemble Size versus Own Qvalue')
xs = np.array(list(six.moves.range(len(fsen_sq))))*5+5
ys = fsen_sq
xlabel('# of models in ensemble')
ylabel('Qvalue')
plot(xs, ys, '.')
ylim(-0.1,1.1)
def graph_ensemble_ms(outdata):
ms_random, ms_random_std, ms_expert, ms_expert_std = extract_ensemble_last_step_error(outdata)
figure()
title('Ensemble Size versus Last Step Multistep Error')
xs = np.array(list(six.moves.range(len(ms_random))))*5+5
xlabel('# of models in ensemble')
ylabel('Last Step Multistep MSE')
plot(xs, ms_random, label='random', color='#0099cc')
plot(xs, ms_random+ms_random_std, color='#6699cc')
plot(xs, ms_random-ms_random_std, color='#6699cc')
plot(xs, ms_expert, label='expert', color='#ff9900')
plot(xs, ms_expert+ms_expert_std, color='#ffcc66')
plot(xs, ms_expert-ms_expert_std, color='#ffcc66')
legend()
ylim(0,0.5)
six.print_('Ensemble last step random {}'.format(ms_random[-1]))
six.print_('Ensemble last step expert {}'.format(ms_expert[-1]))
def graph_info(path, runstr):
data11 = np.load(path+'/stats-{}.npz'.format(runstr))
#data21 = np.load(path+'/mcts-rtype2-rollouts10000-trajectories8-real1-runB.npz')
#data31 = np.load(path+'/mcts-rtype2-rollouts10000-trajectories8-real0-runB.npz')
with open(path+'/fsearch-{}-horizon6.pickle'.format(runstr),'rb') as f:
fsdata = pickle.load(f)
with open(path+'/fsearchensemble-{}-horizon6-n40.pickle'.format(runstr),'rb') as f:
fsensemble = pickle.load(f)
with open(path+'/multistep-{}-horizon6-n10000.pickle'.format(runstr),'rb') as f:
multistep = pickle.load(f)
fs_sq = np.array([d[0][1] for d in fsdata])
fs_sim_sq = np.array([d[0][3] for d in fsdata])
print(fs_sim_sq)
print(np.sum(fs_sim_sq))
fsen_sq, fsen_sim_sq = extract_ensemble_data(fsensemble)
vloss = np.vstack((data11['vloss'],))
vloss_auc = np.sum(vloss,axis=1)
vloss_last = np.mean(vloss[:,-2:],axis=1)
tloss = np.vstack((data11['tloss'],))
tloss_auc = np.sum(tloss,axis=1)
tloss_last = np.mean(tloss[:,-2:],axis=1)
sorted_by_fs_sim_sq = np.flipud(np.argsort(fs_sim_sq))
ranked_by_fs_sim_sq = ixs2ranks(sorted_by_fs_sim_sq)
sorted_by_fs_sq = np.flipud(np.argsort(fs_sq))
ranked_by_fs_sq = ixs2ranks(sorted_by_fs_sq)
sorted_by_vloss = np.argsort(vloss_auc)
ranked_by_vloss = ixs2ranks(sorted_by_vloss)
num_models = vloss_auc.shape[0]
#graph_trainauc_score(vloss, fs_sim_sq)
#ylim(-0.1,1.1)
graph_qval_score(fs_sq, fs_sim_sq)
ylim(-0.1,1.1)
graph_ensemble(fsen_sq, fsen_sim_sq)
# multistep stuff
ms_random, ms_expert = extract_last_step_error(multistep)
graph_average_ms(multistep)
graph_ms_score(ms_random, fs_sim_sq, 'Random')
graph_ms_score(ms_expert, fs_sim_sq, 'Expert')
# multistep ensemble
with open(path+'/multistepensemble-ntrial40-runB-horizon6-ntraj40.pickle','rb') as f:
msendata = pickle.load(f)
graph_ensemble_ms(msendata)
In [7]:
data11 = np.load('experiments/test2_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.000005)
legend()
# looks like after 20 is enough. Use 25.
Out[7]:
In [8]:
data11 = np.load('experiments/test2_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.00005)
legend()
# looks like around 50
Out[8]:
In [2]:
data11 = np.load('experiments/test2_modelgrusimple_mid-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.00005)
legend()
# looks like 25 is fine
Out[2]:
In [3]:
data11 = np.load('experiments/test2_modelgrusimple_mid-noise0.05-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.00005)
legend()
# looks like 45 again is fine
Out[3]:
In [6]:
data11 = np.load('experiments/test2_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle/mem-runA0-epoch40.npz')
six.print_(data11.files)
mem_arrays = data11['mem_arrays']
six.print_(mem_arrays[0])
six.print_(mem_arrays[1])
six.print_(mem_arrays[5][1000])
In [5]:
data11 = np.load('experiments/test2_modelgrusimple_large-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0002)
legend()
# looks like 15 epochs is enough
Out[5]:
In [6]:
data11 = np.load('experiments/test2_modelgrusimple_large-noise0.05-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0002)
legend()
# looks like 25 epochs is enough
Out[6]:
In [8]:
data11 = np.load('experiments/test2_modelgrusimple_large-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0002)
legend()
# looks like 15 is fine
Out[8]:
In [9]:
data11 = np.load('experiments/test2_modelgrusimple_large-noise0.05-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle/stats-runA.npz')
tloss = data11['tloss']
tloss_mean = np.mean(tloss,axis=0)
tloss_std = np.std(tloss,axis=0)/tloss.shape[0]
vloss = data11['vloss']
vloss_mean = np.mean(vloss,axis=0)
vloss_std = np.std(vloss,axis=0)/vloss.shape[0]
figure()
title('Training Losses')
plot(tloss_mean,label='tloss')
plot(vloss_mean,label='vloss')
ylim(0,0.0002)
legend()
# looks like 25 is fine
Out[9]:
In [6]:
graph_info('experiments/test2_modelgrusimple_mid-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle')
In [ ]:
for i in six.moves.range(num_models):
ix = sorted_by_fs_sq[i]
six.print_('{:2d} | fs sim sq {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | fs sq {:.4f} {:2d}th'.format(
ix, fs_sim_sq[ix], ranked_by_fs_sim_sq[ix], vloss_auc[ix], ranked_by_vloss[ix], fs_sq[ix], ranked_by_fs_sq[ix]))
In [7]:
graph_info('experiments/test2_modelgrusimple_mid-noise0.05-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle')
In [ ]:
for i in six.moves.range(num_models):
ix = sorted_by_fs_sq[i]
six.print_('{:2d} | fs sim sq {:.1f} {:2d}th | vloss auc {:.5f} {:2d}th | fs sq {:.4f} {:2d}th'.format(
ix, fs_sim_sq[ix], ranked_by_fs_sim_sq[ix], vloss_auc[ix], ranked_by_vloss[ix], fs_sq[ix], ranked_by_fs_sq[ix]))
In [9]:
graph_info('experiments/test2_modelgrusimple_mid-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle')
In [10]:
graph_info('experiments/test2_modelgrusimple_mid-noise0.05-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle')
In [2]:
graph_info('experiments/test2_modelgrusimple_large-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle', 'runC')
In [16]:
graph_info('experiments/test2_modelgrusimple_large-noise0.05-dropout10-shuffle1-data-test2a-w4-n100000-l5-random.pickle')
In [17]:
graph_info('experiments/test2_modelgrusimple_large-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle')
In [18]:
graph_info('experiments/test2_modelgrusimple_large-noise0.05-dropout10-outputdropout0.50-shuffle1-data-test2a-w4-n100000-l5-random.pickle')
In [ ]: