In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import six
from matplotlib.pyplot import *
In [2]:
'''
Checking the policies and q-values of the learned models for dropout=0.8 and epoch60
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')
data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')
data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')
data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts20000-runB.npz')
vloss = data11['vloss']
scores = data21['scores'][:,0]
initialq = data51['qvals'][:,0]
opts = data61['opts']
qfuncs = data61['qs'][:,0,:,:]
sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]
for r in six.moves.range(scores.shape[0]):
six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
for t in six.moves.range(6):
six.print_(' step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
pass
In [24]:
'''
Let's look for the cases where the policy is correct until the last step, and the last step is wrong.
And good models.
'''
good_ix = [10,13,26,27] # last steps end up being 2
final3 = [7,12,19,20,22,23] # last step should've been 3
final2 = [0,29] # last step should've been 2
# now we can do a preliminary robust matrix evaluation for the good models and the last step should be 2 models
model_ixs = np.concatenate([good_ix, final2])
six.print_(model_ixs)
rmat = np.zeros((model_ixs.shape[0],model_ixs.shape[0]))
# rmat[rmodel,cmodel] = the value of rmodel's policy in cmodel
for pix in six.moves.range(model_ixs.shape[0]):
policy = sorted_opts[model_ixs[pix],0,:]
last_act = policy[-1]
six.print_(last_act)
for eix in six.moves.range(model_ixs.shape[0]):
# qfunc of last step
last_q = sorted_qfuncs[model_ixs[eix],-1,:]
#six.print_(last_q)
rmat[pix,eix] = last_q[last_act]
six.print_(rmat)
#six.print_(np.min(rmat,axis=0))
six.print_(np.min(rmat,axis=1)[:,np.newaxis])
In [35]:
'''
Checking the policies and q-values of the learned models for dropout=1.0 and epoch13
'''
data11 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')
data21 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')
data51 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')
data61 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runB.npz')
vloss = np.concatenate([data11['vloss'],data12['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:]])
sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_score_ix = np.arange(0,100)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]
for r in six.moves.range(scores.shape[0]):
six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
for t in six.moves.range(6):
six.print_(' step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
pass
In [36]:
'''
Let's look for the cases where the policy is correct until the last step, and the last step is wrong.
And good models.
'''
good2 = [5,13,34,46,62,84] # last steps end up being 2
good3 = [3,7,14,26,47,50,63,75] # last steps end up being 3
final2 = [4,11,16,17,20,29,33,39,43,52,55,66,67,93] # last step should've been 2
final3 = [6] # last step should've been 3
# now we can do a preliminary robust matrix evaluation for the good models and the last step should be 2 models
model_ixs = np.concatenate([good2, final2])
six.print_(model_ixs)
rmat = np.zeros((model_ixs.shape[0],model_ixs.shape[0]))
# rmat[rmodel,cmodel] = the value of rmodel's policy in cmodel
for pix in six.moves.range(model_ixs.shape[0]):
policy = sorted_opts[model_ixs[pix],0,:]
last_act = policy[-1]
#six.print_(last_act)
for eix in six.moves.range(model_ixs.shape[0]):
# qfunc of last step
last_q = sorted_qfuncs[model_ixs[eix],-1,:]
#six.print_(last_q)
rmat[pix,eix] = last_q[last_act]
six.print_(rmat)
six.print_(np.mean(rmat,axis=1))
six.print_(np.min(rmat,axis=1))
In [37]:
'''
Checking the policies and q-values of the learned models for dropout=0.8 and epoch23
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runC.npz')
data13 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runD.npz')
data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runC.npz')
data23 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real1-runD.npz')
data31 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runC.npz')
data33 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real0-runD.npz')
data41 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runA.npz')
data42 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runC.npz')
data43 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runD.npz')
data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runC.npz')
data53 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runD.npz')
data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runC.npz')
data63 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runD.npz')
vloss = np.concatenate([data11['vloss'],data12['vloss'],data13['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0],data23['scores'][:,0]])
#trueqvals = np.concatenate([data21['qvals'][:,0],data22['qvals'][:,0],data23['qvals'][:,0]])
#falseqvals = np.concatenate([data31['qvals'][:,0],data32['qvals'][:,0],data33['qvals'][:,0]])
rewards = np.concatenate([data41['rewards'][:,0],data42['rewards'][:,0],data43['rewards'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0],data53['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts'],data63['opts']])
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:],data63['qs'][:,0,:,:]])
sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]
for r in six.moves.range(scores.shape[0]):
six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
for t in six.moves.range(6):
six.print_(' step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
pass
In [39]:
'''
Let's look for the cases where the policy is correct until the last step, and the last step is wrong.
And good models.
'''
good2 = [5,6,8,9,10,11,12,13,14,15,17,18,20,21,22,23,24,26,27,29,30,31,33,35,36,40,43,44,46,61,66,67,77,97] # last steps end up being 2
good3 = [4,89] # last steps end up being 3
final2 = [0,1,3,7,16,25,38,39,41,47,49,50,52,58,62,64,70,71,75,98] # last step should've been 2
final3 = [] # last step should've been 3
six.print_(len(good2))
six.print_(len(final2))
# now we can do a preliminary robust matrix evaluation for the good models and the last step should be 2 models
model_ixs = np.concatenate([good2, final2])
six.print_(model_ixs)
rmat = np.zeros((model_ixs.shape[0],model_ixs.shape[0]))
# rmat[rmodel,cmodel] = the value of rmodel's policy in cmodel
for pix in six.moves.range(model_ixs.shape[0]):
policy = sorted_opts[model_ixs[pix],0,:]
last_act = policy[-1]
#six.print_(last_act)
for eix in six.moves.range(model_ixs.shape[0]):
# qfunc of last step
last_q = sorted_qfuncs[model_ixs[eix],-1,:]
#six.print_(last_q)
rmat[pix,eix] = last_q[last_act]
#six.print_(rmat)
six.print_(np.mean(rmat,axis=1))
six.print_(np.min(rmat,axis=1))
In [24]:
'''
Now let's look at the extended version with all 100 models for no dropout.
'''
data11 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')
data21 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')
data51 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')
data61 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runB.npz')
data71 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runB.npz')
vloss = np.concatenate([data11['vloss'],data12['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])[:,0,:]
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:]])
# each row is a policy
evals = np.vstack([data71['evals'],data72['evals']]).T
#six.print_(evals)
eval_avg = np.mean(evals,axis=1)
sorted_avg_ix = np.flip(np.argsort(eval_avg), 0)
eval_min = np.min(evals,axis=1)
sorted_min_ix = np.flip(np.argsort(eval_min), 0)
eval_per = np.percentile(evals,0.25,axis=1)
sorted_per_ix = np.flip(np.argsort(eval_per), 0)
#six.print_(sorted_avg_ix)
#six.print_(sorted_min_ix)
for r in six.moves.range(evals.shape[0]):
ix = sorted_per_ix[r]
six.print_('model_ix {:2d}: policy {} score {:.3f} initialq {:.3f} eval_avg {:.3f} eval_min {:.3f} per {:.3f}'.format(
ix, opts[ix,:], scores[ix], initialq[ix], eval_avg[ix], eval_min[ix], eval_per[ix]))
In [28]:
'''
Now let's look at the extended version with all 100 models for with dropout.
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runC.npz')
data13 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runD.npz')
data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runC.npz')
data23 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real1-runD.npz')
data31 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runC.npz')
data33 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real0-runD.npz')
data41 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runA.npz')
data42 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runC.npz')
data43 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runD.npz')
data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runC.npz')
data53 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runD.npz')
data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runC.npz')
data63 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runD.npz')
data71 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runC.npz')
data73 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runD.npz')
vloss = np.concatenate([data11['vloss'],data12['vloss'],data13['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0],data23['scores'][:,0]])
rewards = np.concatenate([data41['rewards'][:,0],data42['rewards'][:,0],data43['rewards'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0],data53['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts'],data63['opts']])[:,0,:]
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:],data63['qs'][:,0,:,:]])
# each row is a policy
evals = np.vstack([data71['evals'],data72['evals'],data73['evals']]).T
#six.print_(evals)
eval_avg = np.mean(evals,axis=1)
sorted_avg_ix = np.flip(np.argsort(eval_avg), 0)
eval_min = np.min(evals,axis=1)
sorted_min_ix = np.flip(np.argsort(eval_min), 0)
eval_per = np.percentile(evals,0.25,axis=1)
sorted_per_ix = np.flip(np.argsort(eval_per), 0)
#six.print_(sorted_avg_ix)
#six.print_(sorted_min_ix)
for r in six.moves.range(evals.shape[0]):
ix = sorted_per_ix[r]
six.print_('model_ix {:2d}: policy {} score {:.3f} initialq {:.3f} eval_avg {:.3f} eval_min {:.3f} per {:.3f}'.format(
ix, opts[ix,:], scores[ix], initialq[ix], eval_avg[ix], eval_min[ix], eval_per[ix]))
In [2]:
'''
Now let's look at proper RME with no dropout
'''
def get_ranks(sorted_indices):
ranks = np.zeros(sorted_indices.shape,dtype=np.int)
for i in six.moves.range(sorted_indices.shape[0]):
ranks[sorted_indices[i]] = i+1
return ranks
def array2str(arr):
inner = ' '.join('{:.3f}'.format(x) for x in arr)
return '[{}]'.format(inner)
data11 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')
data21 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')
data31 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runB.npz')
data51 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')
data61 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runB.npz')
data71 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories200-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories200-runB.npz')
# each row is a real environment
raw_evals = np.vstack([data71['evals'],data72['evals']]).T
eval_ixs = np.arange(raw_evals.shape[0])
vloss = np.concatenate([data11['vloss'],data12['vloss']])[:,-1]
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
behavior = np.concatenate([data31['rewards'][:,0],data32['rewards'][:,0]]) / 4.0
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])[:,0,:]
#qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:]])
def normalizeRME(raw_evals):
'''
Find the global mean, and then shift each row's mean to be the global mean.
'''
#six.print_(raw_evals)
globalmean = np.mean(raw_evals)
shifts = globalmean - np.mean(raw_evals, axis=0)
new_evals = raw_evals + shifts[np.newaxis,:]
#six.print_(globalmean)
#six.print_(shifts)
#six.print_(new_evals)
return new_evals
def printmatrixs(es,ixs,scores,behavior,shift=False):
if shift:
temp_es = normalizeRME(es)
else:
temp_es = es
for r in six.moves.range(ixs.shape[0]):
ix = ixs[r]
six.print_('policy model_ix {:2d}: score {:.3f} behavior {:3f} {}'.format(
ix, scores[ix], behavior[ix], temp_es[r,:]))
#six.print_('avg {:2d}: {:.3f} | min {:2d}: {:.3f} | per {:2d}: {:.3f}'.format(
# ranked_avg_ix[ix], eval_avg[ix], ranked_min_ix[ix], eval_min[ix], ranked_per_ix[ix], eval_per[ix]))
#six.print_('std other {:2d}: {:.3f} | std own {:2d}: {:.3f}'.format(
# ranked_std_ix[ix], eval_std[ix], ranked_stdt_ix[ix], eval_stdt[ix]))
def computemetric(es):
# compute the metric
# currently using average eval
temp_es = normalizeRME(es)
# ignore self predictions completely
# unfortunately doesn't seem to make that big of a difference
temp_es_other = temp_es * (1.0 - np.eye(temp_es.shape[0]))
metric = np.mean(temp_es_other,axis=1)
#metric = np.min(es,axis=1)
#metric = np.percentile(es,0.5,axis=1)
#metric = np.std(es,axis=0)
return metric
# try removing some of them
def remove_worst(es,ixs):
metric = computemetric(es)
metricix = np.flip(np.argsort(metric), 0)
#metricix = np.argsort(metric)
worst_ix = metricix[-1]
metric2 = computemetric(es)
metrix2ix = np.flip(np.argsort(metric2), 0)
best_ix = metrix2ix[0]
if metrix2ix.shape[0] > 1:
best_ix2 = metrix2ix[1]
six.print_('Removing worst ix {:2d}: score {:.4f} behavior {:3f}'.format(
ixs[worst_ix], scores[ixs[worst_ix]], behavior[ixs[worst_ix]]))
six.print_(' = Current best ix {:2d}: score {:.4f} behavior {:3f}'.format(
ixs[best_ix], scores[ixs[best_ix]], behavior[ixs[best_ix]]))
if metrix2ix.shape[0] > 1:
six.print_(' = Current 2nd best ix {:2d}: score {:.4f} behavior {:3f}'.format(
ixs[best_ix2], scores[ixs[best_ix2]], behavior[ixs[best_ix2]]))
mask = np.ones(es.shape[0],dtype=bool)
mask[worst_ix] = False
es = es[mask,:]
es = es[:,mask]
ixs = ixs[mask]
return es, ixs
def analyzeRME(raw_evals, eval_ixs, scores, vloss, behavior, initialq):
# show initial average means
#temp_es = normalizeRME(raw_evals)
temp_es = raw_evals
metric = np.mean(temp_es,axis=1)
metricix = np.flip(np.argsort(metric), 0)
six.print_('Initial models ordered by average eval (normalized): {}'.format(metricix))
six.print_('Corresponding average evals: {}'.format(metric[metricix]))
# look at correlation between behavior and scores
figure()
title('rtype 2')
plot(behavior,scores,'.',color='#0000ff')
xlabel('behavior')
ylabel('scores')
# initial matrix
six.print_('Initial matrix limited to the top 6 models')
top6 = metricix[:6]
raw_evals2 = raw_evals[top6,:]
raw_evals2 = raw_evals2[:,top6]
eval_ixs2 = eval_ixs[top6]
six.print_('Normalized:')
printmatrixs(raw_evals2, eval_ixs2, scores, behavior, shift=True)
six.print_('Original:')
printmatrixs(raw_evals2, eval_ixs2, scores, behavior, shift=False)
for i in six.moves.range(38):
raw_evals, eval_ixs = remove_worst(raw_evals, eval_ixs)
#if i > 35 and i < 39:
# printmatrixs(raw_evals, eval_ixs, scores)
six.print_('Normalized:')
printmatrixs(raw_evals, eval_ixs, scores, behavior, shift=True)
six.print_('Original:')
printmatrixs(raw_evals, eval_ixs, scores, behavior, shift=False)
#six.print_('Raw evals shape {}'.format(raw_evals.shape))
#six.print_('Raw evals ixs {}'.format(eval_ixs))
num_models = raw_evals.shape[0]
#sorted_score_eix = np.flip(np.argsort(scores[eval_ixs]), 0)
#sorted_score_ix = eval_ixs[sorted_score_eix]
#six.print_('Indices sorted by score {}'.format(sorted_score_ix))
# use the top few vloss models to evaluate
if False:
sorted_vloss = vloss[sorted_score_ix,-1]
sorted_vloss_ix = np.argsort(sorted_vloss)
ranked_vloss_ix = get_ranks(sorted_vloss_ix)
topmodels = sorted_vloss_ix[:10]
raw_evals = raw_evals[:,topmodels]
six.print_('Sorted Validation Loss {}'.format(sorted_vloss))
six.print_('Sorted Validation Loss Model Indices {}'.format(sorted_vloss_ix))
six.print_(ranked_vloss_ix)
if False:
eval_avg = np.mean(raw_evals,axis=1)
sorted_avg_eix = np.flip(np.argsort(eval_avg), 0)
sorted_avg_ix = eval_ixs[sorted_avg_eix]
ranked_avg_eix = get_ranks(sorted_avg_eix)
eval_min = np.min(raw_evals,axis=1)
sorted_min_eix = np.flip(np.argsort(eval_min), 0)
sorted_min_ix = eval_ixs[sorted_min_eix]
ranked_min_eix = get_ranks(sorted_min_eix)
eval_per = np.percentile(raw_evals,0.25,axis=1)
sorted_per_eix = np.flip(np.argsort(eval_per), 0)
sorted_per_ix = eval_ixs[sorted_per_eix]
ranked_per_eix = get_ranks(sorted_per_eix)
eval_max = np.max(raw_evals,axis=1)
sorted_max_eix = np.flip(np.argsort(eval_max), 0)
sorted_max_ix = eval_ixs[sorted_max_eix]
ranked_max_eix = get_ranks(sorted_max_eix)
six.print_('Sorted by avg eval')
six.print_('ixs: {}'.format(array2str(sorted_avg_ix)))
six.print_('scores: {}'.format(array2str(scores[sorted_avg_ix])))
six.print_('Sorted by min eval')
six.print_('scores: {}'.format(array2str(scores[sorted_min_ix])))
six.print_('Sorted by 25% per eval')
six.print_('scores: {}'.format(array2str(scores[sorted_per_ix])))
six.print_('Sorted by max eval')
six.print_('scores: {}'.format(array2str(scores[sorted_max_ix])))
eval_std = np.std(raw_evals,axis=1)
sorted_std_eix = np.argsort(eval_std)
sorted_std_ix = eval_ixs[sorted_std_eix]
ranked_std_eix = get_ranks(sorted_std_eix)
six.print_('Sorted by smallest std of evals by other models')
six.print_('scores: {}'.format(array2str(scores[sorted_std_ix])))
eval_stdt = np.std(raw_evals,axis=0)
sorted_stdt_eix = np.argsort(eval_stdt)
sorted_stdt_ix = eval_ixs[sorted_stdt_eix]
ranked_stdt_eix = get_ranks(sorted_stdt_eix)
six.print_('Sorted by smallest std of own evals')
six.print_('scores: {}'.format(array2str(scores[sorted_stdt_ix])))
analyzeRME(raw_evals, eval_ixs, scores, vloss, behavior, initialq)
In [3]:
'''
Now let's look at proper RME with dropout
'''
def get_ranks(sorted_indices):
ranks = np.zeros(sorted_indices.shape,dtype=np.int)
for i in six.moves.range(sorted_indices.shape[0]):
ranks[sorted_indices[i]] = i+1
return ranks
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runC.npz')
data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runC.npz')
data31 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runC.npz')
data41 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runA.npz')
data42 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runC.npz')
data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runC.npz')
data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runC.npz')
data71 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories100-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories100-runC.npz')
# each row is a real environment
raw_evals = np.vstack([data71['evals'],data72['evals']]).T
eval_ixs = np.arange(raw_evals.shape[0])
vloss = np.concatenate([data11['vloss'],data12['vloss']])[:,-1]
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
behavior = np.concatenate([data41['rewards'][:,0],data42['rewards'][:,0]]) / 4.0
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])[:,0,:]
analyzeRME(raw_evals, eval_ixs, scores, vloss, behavior, initialq)
In [ ]: