In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import six
from matplotlib.pyplot import *

In [2]:
'''
Checking the policies and q-values of the learned models for dropout=0.8 and epoch60
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')

data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')

data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts20000-runB.npz')

vloss = data11['vloss']
scores = data21['scores'][:,0]
initialq = data51['qvals'][:,0]
opts = data61['opts']
qfuncs = data61['qs'][:,0,:,:]

sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
    for t in six.moves.range(6):
        six.print_('  step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
    pass


 0: score 0.750 initialq 3.46 opt [[2 1 1 3 3 0]]
  step 0 qfunc [ 2.24 2.53 3.41 2.27 ]
  step 1 qfunc [ 2.33 3.44 2.34 2.38 ]
  step 2 qfunc [ 2.54 3.46 2.42 2.52 ]
  step 3 qfunc [ 2.79 3.15 2.44 3.50 ]
  step 4 qfunc [ 3.51 3.45 2.45 3.51 ]
  step 5 qfunc [ 3.52 3.31 3.52 3.51 ]
 1: score 0.500 initialq 3.45 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.25 3.39 2.33 2.24 ]
  step 1 qfunc [ 2.46 3.43 2.62 2.50 ]
  step 2 qfunc [ 2.69 2.82 2.69 3.48 ]
  step 3 qfunc [ 3.38 3.28 3.50 3.47 ]
  step 4 qfunc [ 3.51 3.51 3.49 3.50 ]
  step 5 qfunc [ 3.52 3.52 3.52 3.21 ]
 2: score 0.539 initialq 3.45 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.20 3.38 2.41 2.19 ]
  step 1 qfunc [ 2.27 3.42 2.61 2.40 ]
  step 2 qfunc [ 2.57 2.69 2.54 3.47 ]
  step 3 qfunc [ 3.27 3.35 3.49 3.23 ]
  step 4 qfunc [ 3.48 3.50 3.00 3.46 ]
  step 5 qfunc [ 3.52 3.45 3.51 3.29 ]
 3: score 0.500 initialq 3.44 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.24 3.37 2.29 2.23 ]
  step 1 qfunc [ 2.40 3.42 2.48 2.39 ]
  step 2 qfunc [ 2.39 2.52 2.58 3.48 ]
  step 3 qfunc [ 3.10 3.19 3.49 2.77 ]
  step 4 qfunc [ 3.50 3.50 3.15 3.40 ]
  step 5 qfunc [ 3.51 3.51 3.29 3.06 ]
 4: score 0.750 initialq 3.42 opt [[1 1 3 2 1 2]]
  step 0 qfunc [ 2.21 3.36 2.35 2.26 ]
  step 1 qfunc [ 2.38 3.40 2.49 2.31 ]
  step 2 qfunc [ 2.39 2.67 2.86 3.47 ]
  step 3 qfunc [ 3.03 3.27 3.49 3.29 ]
  step 4 qfunc [ 3.44 3.50 3.41 3.28 ]
  step 5 qfunc [ 3.51 3.48 3.51 2.62 ]
 5: score 0.500 initialq 3.41 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.27 3.31 2.31 2.08 ]
  step 1 qfunc [ 2.45 3.37 2.58 2.40 ]
  step 2 qfunc [ 2.62 2.61 3.12 3.45 ]
  step 3 qfunc [ 3.02 2.88 3.47 2.93 ]
  step 4 qfunc [ 3.48 3.23 3.30 3.27 ]
  step 5 qfunc [ 3.49 3.26 3.38 3.27 ]
 6: score 0.500 initialq 3.40 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.25 3.32 2.33 2.15 ]
  step 1 qfunc [ 2.37 3.38 2.38 2.29 ]
  step 2 qfunc [ 2.54 2.68 2.69 3.45 ]
  step 3 qfunc [ 3.00 3.09 3.46 3.20 ]
  step 4 qfunc [ 3.32 3.47 3.40 3.41 ]
  step 5 qfunc [ 3.49 3.46 3.48 2.71 ]
 7: score 0.682 initialq 3.40 opt [[1 2 1 3 2 0]]
  step 0 qfunc [ 2.25 3.34 2.40 2.17 ]
  step 1 qfunc [ 2.43 2.55 3.38 2.35 ]
  step 2 qfunc [ 2.61 3.40 2.50 2.59 ]
  step 3 qfunc [ 2.96 2.99 2.50 3.44 ]
  step 4 qfunc [ 3.44 3.18 3.45 3.40 ]
  step 5 qfunc [ 3.46 2.90 3.44 3.16 ]
 8: score 0.596 initialq 3.38 opt [[1 2 1 3 1 2]]
  step 0 qfunc [ 2.22 3.33 2.42 2.24 ]
  step 1 qfunc [ 2.48 2.87 3.36 2.47 ]
  step 2 qfunc [ 2.57 3.38 2.57 2.69 ]
  step 3 qfunc [ 2.80 3.15 2.53 3.43 ]
  step 4 qfunc [ 3.43 3.44 3.42 3.31 ]
  step 5 qfunc [ 3.44 3.44 3.46 3.26 ]
 9: score 0.500 initialq 3.38 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.26 3.29 2.35 2.35 ]
  step 1 qfunc [ 2.50 3.35 2.63 2.49 ]
  step 2 qfunc [ 2.68 2.69 2.87 3.42 ]
  step 3 qfunc [ 3.08 3.12 3.44 2.90 ]
  step 4 qfunc [ 3.45 3.37 3.20 3.38 ]
  step 5 qfunc [ 3.46 3.42 3.20 2.82 ]
10: score 1.000 initialq 3.38 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.25 2.50 3.32 2.23 ]
  step 1 qfunc [ 2.34 3.36 2.26 2.32 ]
  step 2 qfunc [ 2.58 3.38 2.54 2.62 ]
  step 3 qfunc [ 2.51 2.81 2.54 3.42 ]
  step 4 qfunc [ 3.25 3.21 3.19 3.44 ]
  step 5 qfunc [ 2.57 2.54 3.46 2.53 ]
11: score 0.500 initialq 3.38 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.23 3.29 2.37 2.22 ]
  step 1 qfunc [ 2.36 3.35 2.66 2.42 ]
  step 2 qfunc [ 2.72 2.62 2.69 3.41 ]
  step 3 qfunc [ 3.08 2.90 3.43 3.16 ]
  step 4 qfunc [ 3.45 3.39 3.38 3.39 ]
  step 5 qfunc [ 3.47 3.38 3.36 3.30 ]
12: score 0.760 initialq 3.36 opt [[1 2 1 3 2 0]]
  step 0 qfunc [ 2.20 3.30 2.32 2.16 ]
  step 1 qfunc [ 2.29 2.65 3.34 2.24 ]
  step 2 qfunc [ 2.44 3.36 2.59 2.50 ]
  step 3 qfunc [ 2.81 2.78 3.04 3.42 ]
  step 4 qfunc [ 3.36 3.34 3.43 2.88 ]
  step 5 qfunc [ 3.47 3.46 3.44 3.43 ]
13: score 0.971 initialq 3.36 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.26 3.29 2.39 2.30 ]
  step 1 qfunc [ 2.37 2.49 3.35 2.42 ]
  step 2 qfunc [ 2.45 3.37 2.50 2.55 ]
  step 3 qfunc [ 2.53 2.75 2.47 3.40 ]
  step 4 qfunc [ 3.31 3.21 2.15 3.42 ]
  step 5 qfunc [ 2.64 2.58 3.51 2.53 ]
14: score 0.521 initialq 3.36 opt [[1 2 1 3 0 0]]
  step 0 qfunc [ 2.22 3.28 2.46 2.26 ]
  step 1 qfunc [ 2.46 2.72 3.33 2.43 ]
  step 2 qfunc [ 2.60 3.36 2.62 2.70 ]
  step 3 qfunc [ 3.21 3.05 3.16 3.38 ]
  step 4 qfunc [ 3.39 3.20 3.35 3.35 ]
  step 5 qfunc [ 3.40 3.30 3.27 2.91 ]
15: score 0.500 initialq 3.35 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.24 3.27 2.36 2.23 ]
  step 1 qfunc [ 2.44 3.32 2.48 2.47 ]
  step 2 qfunc [ 2.67 2.74 2.57 3.40 ]
  step 3 qfunc [ 3.13 3.21 3.42 3.03 ]
  step 4 qfunc [ 3.35 3.43 3.31 3.25 ]
  step 5 qfunc [ 3.45 3.40 3.37 3.37 ]
16: score 0.521 initialq 3.35 opt [[1 2 1 3 0 2]]
  step 0 qfunc [ 2.17 3.26 2.38 2.13 ]
  step 1 qfunc [ 2.38 2.55 3.30 2.35 ]
  step 2 qfunc [ 2.48 3.32 2.49 2.41 ]
  step 3 qfunc [ 2.64 2.66 2.51 3.43 ]
  step 4 qfunc [ 3.44 3.21 3.17 3.18 ]
  step 5 qfunc [ 3.44 3.34 3.45 2.61 ]
17: score 0.521 initialq 3.35 opt [[1 1 3 2 1 1]]
  step 0 qfunc [ 2.20 3.29 2.43 2.27 ]
  step 1 qfunc [ 2.53 3.33 2.64 2.40 ]
  step 2 qfunc [ 2.66 2.60 2.64 3.40 ]
  step 3 qfunc [ 3.21 3.21 3.42 2.91 ]
  step 4 qfunc [ 3.42 3.43 3.39 3.03 ]
  step 5 qfunc [ 3.44 3.45 3.45 3.34 ]
18: score 0.500 initialq 3.33 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.27 3.26 2.39 2.22 ]
  step 1 qfunc [ 2.44 3.30 2.55 2.46 ]
  step 2 qfunc [ 2.68 2.72 2.74 3.35 ]
  step 3 qfunc [ 3.01 3.20 3.36 3.05 ]
  step 4 qfunc [ 3.31 3.38 3.34 3.35 ]
  step 5 qfunc [ 3.41 3.38 3.37 2.97 ]
19: score 0.740 initialq 3.32 opt [[1 2 1 3 2 0]]
  step 0 qfunc [ 2.14 3.26 2.44 2.13 ]
  step 1 qfunc [ 2.44 2.70 3.30 2.43 ]
  step 2 qfunc [ 2.54 3.32 2.50 2.60 ]
  step 3 qfunc [ 2.85 3.05 2.66 3.38 ]
  step 4 qfunc [ 3.37 3.31 3.40 3.35 ]
  step 5 qfunc [ 3.41 3.20 3.38 3.28 ]
20: score 0.750 initialq 3.32 opt [[1 1 3 2 2 0]]
  step 0 qfunc [ 2.21 3.26 2.38 2.25 ]
  step 1 qfunc [ 2.46 3.30 2.58 2.43 ]
  step 2 qfunc [ 2.65 2.50 2.58 3.35 ]
  step 3 qfunc [ 3.13 3.25 3.37 2.65 ]
  step 4 qfunc [ 3.36 3.35 3.38 3.17 ]
  step 5 qfunc [ 3.39 3.38 3.39 3.13 ]
21: score 0.500 initialq 3.32 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.24 3.26 2.32 2.19 ]
  step 1 qfunc [ 2.45 3.30 2.51 2.41 ]
  step 2 qfunc [ 2.69 2.73 2.50 3.35 ]
  step 3 qfunc [ 3.18 3.15 3.37 2.81 ]
  step 4 qfunc [ 3.37 3.38 3.32 3.18 ]
  step 5 qfunc [ 3.39 3.39 3.36 3.29 ]
22: score 0.750 initialq 3.32 opt [[1 1 3 2 2 0]]
  step 0 qfunc [ 2.26 3.24 2.43 2.21 ]
  step 1 qfunc [ 2.40 3.29 2.57 2.39 ]
  step 2 qfunc [ 2.68 2.59 2.73 3.34 ]
  step 3 qfunc [ 3.09 3.08 3.36 2.85 ]
  step 4 qfunc [ 3.34 3.36 3.37 3.33 ]
  step 5 qfunc [ 3.40 3.34 3.22 3.21 ]
23: score 0.750 initialq 3.31 opt [[2 1 1 3 2 0]]
  step 0 qfunc [ 2.32 2.56 3.22 2.23 ]
  step 1 qfunc [ 2.34 3.27 2.22 2.27 ]
  step 2 qfunc [ 2.55 3.29 2.56 2.62 ]
  step 3 qfunc [ 2.97 2.71 3.07 3.39 ]
  step 4 qfunc [ 3.23 3.16 3.40 2.99 ]
  step 5 qfunc [ 3.44 3.31 3.36 2.67 ]
24: score 0.500 initialq 3.31 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.28 3.26 2.40 2.25 ]
  step 1 qfunc [ 2.41 3.29 2.54 2.41 ]
  step 2 qfunc [ 2.63 2.59 2.50 3.37 ]
  step 3 qfunc [ 3.33 3.01 3.39 2.75 ]
  step 4 qfunc [ 3.40 3.40 3.37 3.17 ]
  step 5 qfunc [ 3.43 3.42 3.41 3.30 ]
25: score 0.768 initialq 3.27 opt [[1 1 1 1 3 2]]
  step 0 qfunc [ 2.23 3.19 2.28 2.22 ]
  step 1 qfunc [ 2.28 3.28 2.33 2.25 ]
  step 2 qfunc [ 2.37 3.35 2.35 2.62 ]
  step 3 qfunc [ 2.32 3.40 2.37 3.32 ]
  step 4 qfunc [ 2.25 2.24 2.40 3.48 ]
  step 5 qfunc [ 2.62 2.55 3.48 2.57 ]
26: score 1.000 initialq 3.27 opt [[1 1 2 3 3 2]]
  step 0 qfunc [ 2.31 3.18 2.37 2.27 ]
  step 1 qfunc [ 2.39 3.25 2.50 2.41 ]
  step 2 qfunc [ 2.56 2.54 3.36 2.73 ]
  step 3 qfunc [ 2.39 2.92 2.39 3.37 ]
  step 4 qfunc [ 3.00 2.85 2.94 3.38 ]
  step 5 qfunc [ 2.74 2.60 3.52 2.57 ]
27: score 1.000 initialq 3.27 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.22 3.18 2.36 2.14 ]
  step 1 qfunc [ 2.43 3.23 2.48 2.37 ]
  step 2 qfunc [ 2.44 2.52 2.61 3.35 ]
  step 3 qfunc [ 3.14 3.08 3.36 2.90 ]
  step 4 qfunc [ 3.38 3.31 3.34 3.38 ]
  step 5 qfunc [ 2.71 2.53 3.39 2.53 ]
28: score 0.557 initialq 3.26 opt [[1 2 1 3 1 2]]
  step 0 qfunc [ 2.18 3.19 2.44 2.25 ]
  step 1 qfunc [ 2.46 2.73 3.24 2.41 ]
  step 2 qfunc [ 2.51 3.25 2.56 2.58 ]
  step 3 qfunc [ 2.60 3.01 2.59 3.36 ]
  step 4 qfunc [ 3.21 3.38 3.14 3.17 ]
  step 5 qfunc [ 3.37 3.36 3.41 3.00 ]
29: score 0.742 initialq 3.18 opt [[1 3 1 3 2 0]]
  step 0 qfunc [ 2.25 3.17 2.46 2.27 ]
  step 1 qfunc [ 2.47 2.66 2.61 3.27 ]
  step 2 qfunc [ 2.37 3.29 2.50 2.35 ]
  step 3 qfunc [ 2.42 2.62 2.58 3.39 ]
  step 4 qfunc [ 2.97 2.76 3.42 2.72 ]
  step 5 qfunc [ 3.44 3.18 2.82 3.05 ]

In [24]:
'''
Let's look for the cases where the policy is correct until the last step, and the last step is wrong.
And good models.
'''
good_ix = [10,13,26,27] # last steps end up being 2
final3 = [7,12,19,20,22,23] # last step should've been 3
final2 = [0,29] # last step should've been 2
# now we can do a preliminary robust matrix evaluation for the good models and the last step should be 2 models
model_ixs = np.concatenate([good_ix, final2])
six.print_(model_ixs)
rmat = np.zeros((model_ixs.shape[0],model_ixs.shape[0]))
# rmat[rmodel,cmodel] = the value of rmodel's policy in cmodel
for pix in six.moves.range(model_ixs.shape[0]):
    policy = sorted_opts[model_ixs[pix],0,:]
    last_act = policy[-1]
    six.print_(last_act)
    for eix in six.moves.range(model_ixs.shape[0]):
        # qfunc of last step
        last_q = sorted_qfuncs[model_ixs[eix],-1,:]
        #six.print_(last_q)
        rmat[pix,eix] = last_q[last_act]
six.print_(rmat)
#six.print_(np.min(rmat,axis=0))
six.print_(np.min(rmat,axis=1)[:,np.newaxis])


[10 13 26 27  0 29]
2
2
2
2
0
0
[[ 3.46448722  3.5056317   3.51720442  3.39157984  3.51887841  2.82469152]
 [ 3.46448722  3.5056317   3.51720442  3.39157984  3.51887841  2.82469152]
 [ 3.46448722  3.5056317   3.51720442  3.39157984  3.51887841  2.82469152]
 [ 3.46448722  3.5056317   3.51720442  3.39157984  3.51887841  2.82469152]
 [ 2.57431363  2.64013803  2.73524159  2.71147496  3.52054915  3.43583114]
 [ 2.57431363  2.64013803  2.73524159  2.71147496  3.52054915  3.43583114]]
[[ 2.82469152]
 [ 2.82469152]
 [ 2.82469152]
 [ 2.82469152]
 [ 2.57431363]
 [ 2.57431363]]

In [35]:
'''
Checking the policies and q-values of the learned models for dropout=1.0 and epoch13
'''
data11 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')

data21 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

data51 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')

data61 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runB.npz')

vloss = np.concatenate([data11['vloss'],data12['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:]])

sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_score_ix = np.arange(0,100)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
    for t in six.moves.range(6):
        six.print_('  step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
    pass


 0: score 0.500 initialq 3.97 opt [[1 2 1 3 0 0]]
  step 0 qfunc [ 2.47 3.91 3.15 2.86 ]
  step 1 qfunc [ 3.12 3.50 3.95 2.97 ]
  step 2 qfunc [ 3.23 3.96 3.22 3.07 ]
  step 3 qfunc [ 3.94 3.94 3.94 3.99 ]
  step 4 qfunc [ 3.99 3.99 3.99 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
 1: score 0.755 initialq 3.96 opt [[2 1 3 1 3 3]]
  step 0 qfunc [ 2.74 3.14 3.92 2.54 ]
  step 1 qfunc [ 2.72 3.94 2.77 2.57 ]
  step 2 qfunc [ 3.19 3.95 3.07 3.95 ]
  step 3 qfunc [ 3.11 3.97 2.92 2.96 ]
  step 4 qfunc [ 3.97 3.96 3.30 3.98 ]
  step 5 qfunc [ 3.98 3.98 3.99 3.99 ]
 2: score 0.750 initialq 3.96 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.67 3.89 2.96 2.83 ]
  step 1 qfunc [ 3.16 3.35 3.94 3.09 ]
  step 2 qfunc [ 3.10 3.83 2.99 3.96 ]
  step 3 qfunc [ 2.91 3.97 2.99 2.95 ]
  step 4 qfunc [ 3.97 3.88 3.00 3.98 ]
  step 5 qfunc [ 3.99 3.97 3.99 3.97 ]
 3: score 0.953 initialq 3.96 opt [[1 2 1 3 2 3]]
  step 0 qfunc [ 2.62 3.89 2.94 2.72 ]
  step 1 qfunc [ 2.91 3.34 3.93 2.80 ]
  step 2 qfunc [ 3.27 3.95 3.27 3.05 ]
  step 3 qfunc [ 3.29 3.96 3.23 3.97 ]
  step 4 qfunc [ 3.97 3.98 3.98 3.97 ]
  step 5 qfunc [ 3.97 3.98 3.98 3.99 ]
 4: score 0.880 initialq 3.96 opt [[1 1 3 2 3 3]]
  step 0 qfunc [ 2.62 3.83 2.97 2.77 ]
  step 1 qfunc [ 3.11 3.90 3.21 2.98 ]
  step 2 qfunc [ 3.26 3.36 3.46 3.95 ]
  step 3 qfunc [ 3.48 3.50 3.96 3.96 ]
  step 4 qfunc [ 3.97 3.91 3.96 3.99 ]
  step 5 qfunc [ 3.98 3.99 3.99 3.99 ]
 5: score 0.977 initialq 3.96 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.63 3.89 3.22 2.47 ]
  step 1 qfunc [ 3.06 3.27 3.93 3.11 ]
  step 2 qfunc [ 3.33 3.95 3.24 3.49 ]
  step 3 qfunc [ 3.95 3.94 3.92 3.98 ]
  step 4 qfunc [ 3.98 3.99 3.95 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
 6: score 0.750 initialq 3.96 opt [[1 2 1 3 2 2]]
  step 0 qfunc [ 2.48 3.89 3.03 2.69 ]
  step 1 qfunc [ 3.10 3.27 3.93 2.88 ]
  step 2 qfunc [ 3.18 3.95 3.17 3.21 ]
  step 3 qfunc [ 3.86 3.95 3.40 3.97 ]
  step 4 qfunc [ 3.98 3.94 3.98 3.98 ]
  step 5 qfunc [ 3.99 3.96 3.99 3.99 ]
 7: score 0.880 initialq 3.96 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.82 3.78 2.96 2.68 ]
  step 1 qfunc [ 2.87 3.88 3.18 2.81 ]
  step 2 qfunc [ 3.16 3.07 3.35 3.93 ]
  step 3 qfunc [ 3.88 3.65 3.96 3.91 ]
  step 4 qfunc [ 3.63 3.64 3.98 3.96 ]
  step 5 qfunc [ 3.03 3.15 3.04 3.99 ]
 8: score 0.773 initialq 3.96 opt [[1 2 0 1 3 3]]
  step 0 qfunc [ 2.53 3.84 2.86 2.71 ]
  step 1 qfunc [ 3.00 3.13 3.91 3.02 ]
  step 2 qfunc [ 3.94 3.56 3.23 3.29 ]
  step 3 qfunc [ 3.18 3.96 3.25 3.36 ]
  step 4 qfunc [ 3.03 3.96 3.01 3.98 ]
  step 5 qfunc [ 3.97 3.72 3.98 3.99 ]
 9: score 0.750 initialq 3.96 opt [[1 2 3 1 3 3]]
  step 0 qfunc [ 2.77 3.86 2.93 2.52 ]
  step 1 qfunc [ 2.84 2.96 3.92 2.70 ]
  step 2 qfunc [ 3.30 3.55 3.28 3.95 ]
  step 3 qfunc [ 3.36 3.96 3.33 3.50 ]
  step 4 qfunc [ 3.53 3.93 3.08 3.97 ]
  step 5 qfunc [ 3.86 3.86 3.98 3.99 ]
10: score 0.750 initialq 3.96 opt [[1 2 1 3 0 2]]
  step 0 qfunc [ 2.56 3.90 3.18 2.72 ]
  step 1 qfunc [ 3.18 3.41 3.93 2.98 ]
  step 2 qfunc [ 3.37 3.95 3.35 3.55 ]
  step 3 qfunc [ 3.93 3.94 3.94 3.98 ]
  step 4 qfunc [ 3.98 3.97 3.98 3.98 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.97 ]
11: score 0.747 initialq 3.95 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.78 3.88 2.99 2.76 ]
  step 1 qfunc [ 2.91 3.14 3.92 2.99 ]
  step 2 qfunc [ 3.24 3.94 3.21 3.91 ]
  step 3 qfunc [ 3.95 3.91 3.92 3.98 ]
  step 4 qfunc [ 3.97 3.96 3.99 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
12: score 0.750 initialq 3.95 opt [[1 1 2 0 3 2]]
  step 0 qfunc [ 2.57 3.84 3.04 2.50 ]
  step 1 qfunc [ 2.77 3.89 3.14 2.91 ]
  step 2 qfunc [ 3.67 3.61 3.94 3.60 ]
  step 3 qfunc [ 3.97 3.94 3.95 3.90 ]
  step 4 qfunc [ 3.97 3.96 3.96 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
13: score 0.833 initialq 3.95 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.73 3.82 2.90 2.54 ]
  step 1 qfunc [ 3.11 3.90 3.16 3.10 ]
  step 2 qfunc [ 3.23 3.38 3.05 3.94 ]
  step 3 qfunc [ 3.95 3.49 3.93 3.96 ]
  step 4 qfunc [ 3.97 3.21 3.97 3.97 ]
  step 5 qfunc [ 3.95 3.96 3.99 3.98 ]
14: score 0.924 initialq 3.95 opt [[1 1 2 3 2 3]]
  step 0 qfunc [ 2.78 3.72 2.93 2.74 ]
  step 1 qfunc [ 3.03 3.84 3.24 3.00 ]
  step 2 qfunc [ 3.32 3.32 3.92 3.50 ]
  step 3 qfunc [ 3.62 3.27 3.92 3.96 ]
  step 4 qfunc [ 3.30 3.23 3.98 3.56 ]
  step 5 qfunc [ 3.33 3.32 3.26 3.99 ]
15: score 0.839 initialq 3.95 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.57 3.87 2.97 2.64 ]
  step 1 qfunc [ 3.02 3.14 3.92 2.95 ]
  step 2 qfunc [ 3.38 3.93 3.37 3.94 ]
  step 3 qfunc [ 3.22 3.97 3.20 3.41 ]
  step 4 qfunc [ 3.95 3.97 3.00 3.98 ]
  step 5 qfunc [ 3.99 3.98 3.99 3.99 ]
16: score 0.690 initialq 3.95 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.68 3.87 2.82 2.71 ]
  step 1 qfunc [ 2.75 3.27 3.92 2.91 ]
  step 2 qfunc [ 3.31 3.94 3.29 3.46 ]
  step 3 qfunc [ 3.73 3.94 3.61 3.97 ]
  step 4 qfunc [ 3.95 3.98 3.95 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
17: score 0.747 initialq 3.95 opt [[1 2 1 3 3 1]]
  step 0 qfunc [ 2.65 3.89 3.02 2.88 ]
  step 1 qfunc [ 2.84 3.44 3.93 2.99 ]
  step 2 qfunc [ 3.36 3.94 3.37 3.94 ]
  step 3 qfunc [ 3.97 3.96 3.93 3.98 ]
  step 4 qfunc [ 3.98 3.99 3.98 3.99 ]
  step 5 qfunc [ 3.98 3.99 3.99 3.99 ]
18: score 0.750 initialq 3.95 opt [[1 2 1 1 3 2]]
  step 0 qfunc [ 2.67 3.85 2.93 2.76 ]
  step 1 qfunc [ 3.01 3.16 3.92 2.96 ]
  step 2 qfunc [ 3.21 3.94 3.17 3.06 ]
  step 3 qfunc [ 3.28 3.97 3.37 3.91 ]
  step 4 qfunc [ 3.62 3.97 3.07 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
19: score 0.638 initialq 3.95 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.73 3.86 2.99 2.70 ]
  step 1 qfunc [ 3.01 3.15 3.91 2.88 ]
  step 2 qfunc [ 3.28 3.93 3.36 3.37 ]
  step 3 qfunc [ 3.51 3.96 3.95 3.86 ]
  step 4 qfunc [ 3.89 3.97 3.79 3.98 ]
  step 5 qfunc [ 3.99 3.96 3.99 3.99 ]
20: score 0.750 initialq 3.95 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.70 3.87 2.89 2.68 ]
  step 1 qfunc [ 2.91 3.17 3.91 2.87 ]
  step 2 qfunc [ 3.36 3.94 3.35 3.47 ]
  step 3 qfunc [ 3.95 3.94 3.83 3.96 ]
  step 4 qfunc [ 3.97 3.94 3.94 3.98 ]
  step 5 qfunc [ 3.98 3.98 3.99 3.99 ]
21: score 0.500 initialq 3.95 opt [[1 2 3 1 1 3]]
  step 0 qfunc [ 2.83 3.85 3.07 2.70 ]
  step 1 qfunc [ 2.91 3.15 3.92 3.04 ]
  step 2 qfunc [ 3.27 3.40 3.14 3.95 ]
  step 3 qfunc [ 3.23 3.96 3.36 3.40 ]
  step 4 qfunc [ 3.94 3.98 3.16 3.96 ]
  step 5 qfunc [ 3.00 3.00 3.00 3.98 ]
22: score 0.750 initialq 3.95 opt [[1 1 2 0 3 2]]
  step 0 qfunc [ 2.78 3.82 2.97 2.57 ]
  step 1 qfunc [ 3.05 3.89 3.25 3.16 ]
  step 2 qfunc [ 3.48 3.59 3.95 3.68 ]
  step 3 qfunc [ 3.96 3.48 3.96 3.91 ]
  step 4 qfunc [ 3.97 3.96 3.96 3.98 ]
  step 5 qfunc [ 3.98 3.98 3.99 3.99 ]
23: score 0.750 initialq 3.95 opt [[1 2 3 1 3 3]]
  step 0 qfunc [ 2.70 3.86 3.02 2.73 ]
  step 1 qfunc [ 3.08 3.30 3.93 3.01 ]
  step 2 qfunc [ 3.28 3.52 3.07 3.95 ]
  step 3 qfunc [ 3.16 3.95 3.25 3.95 ]
  step 4 qfunc [ 3.06 3.95 3.41 3.97 ]
  step 5 qfunc [ 3.93 3.94 3.97 3.99 ]
24: score 0.534 initialq 3.95 opt [[2 1 1 0 1 3]]
  step 0 qfunc [ 2.66 3.15 3.87 2.62 ]
  step 1 qfunc [ 2.72 3.91 2.87 2.89 ]
  step 2 qfunc [ 3.15 3.93 3.06 3.38 ]
  step 3 qfunc [ 3.95 3.93 3.27 3.95 ]
  step 4 qfunc [ 3.23 3.98 3.00 3.96 ]
  step 5 qfunc [ 3.10 3.02 3.01 3.98 ]
25: score 0.750 initialq 3.95 opt [[2 1 1 3 1 3]]
  step 0 qfunc [ 2.73 3.02 3.85 2.79 ]
  step 1 qfunc [ 2.84 3.91 2.90 2.64 ]
  step 2 qfunc [ 3.26 3.94 3.21 3.30 ]
  step 3 qfunc [ 3.49 3.40 3.51 3.96 ]
  step 4 qfunc [ 3.97 3.97 3.97 3.97 ]
  step 5 qfunc [ 3.02 3.05 3.05 3.98 ]
26: score 1.000 initialq 3.95 opt [[1 1 2 3 2 3]]
  step 0 qfunc [ 2.70 3.78 3.01 2.60 ]
  step 1 qfunc [ 3.08 3.89 3.30 3.03 ]
  step 2 qfunc [ 3.34 3.39 3.94 3.44 ]
  step 3 qfunc [ 3.87 3.32 3.83 3.97 ]
  step 4 qfunc [ 3.22 3.14 3.98 3.89 ]
  step 5 qfunc [ 3.13 3.16 3.10 3.99 ]
27: score 0.750 initialq 3.95 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.62 3.89 2.93 2.81 ]
  step 1 qfunc [ 2.97 2.84 3.94 3.00 ]
  step 2 qfunc [ 3.23 3.15 3.19 3.95 ]
  step 3 qfunc [ 3.25 3.96 3.25 3.29 ]
  step 4 qfunc [ 3.97 3.96 3.70 3.98 ]
  step 5 qfunc [ 3.97 3.93 3.99 3.98 ]
28: score 0.599 initialq 3.95 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.84 3.84 2.86 2.89 ]
  step 1 qfunc [ 2.91 3.06 3.90 3.10 ]
  step 2 qfunc [ 3.36 3.93 3.29 3.31 ]
  step 3 qfunc [ 3.94 3.96 3.26 3.13 ]
  step 4 qfunc [ 3.97 3.97 3.83 3.98 ]
  step 5 qfunc [ 3.99 3.97 3.98 3.96 ]
29: score 0.750 initialq 3.95 opt [[1 2 1 3 3 0]]
  step 0 qfunc [ 2.74 3.86 2.96 2.66 ]
  step 1 qfunc [ 2.86 3.24 3.91 3.04 ]
  step 2 qfunc [ 3.11 3.93 3.04 3.13 ]
  step 3 qfunc [ 3.30 3.95 3.45 3.96 ]
  step 4 qfunc [ 3.97 3.95 3.48 3.98 ]
  step 5 qfunc [ 3.99 3.97 3.90 3.99 ]
30: score 0.510 initialq 3.95 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.73 3.87 2.95 2.68 ]
  step 1 qfunc [ 2.93 3.29 3.93 3.00 ]
  step 2 qfunc [ 3.20 3.57 3.17 3.95 ]
  step 3 qfunc [ 3.94 3.93 3.11 3.96 ]
  step 4 qfunc [ 2.36 3.96 2.51 2.35 ]
  step 5 qfunc [ 3.03 3.01 3.11 3.99 ]
31: score 0.500 initialq 3.95 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.67 3.83 2.96 2.72 ]
  step 1 qfunc [ 3.14 3.89 3.14 2.96 ]
  step 2 qfunc [ 3.39 3.58 3.42 3.94 ]
  step 3 qfunc [ 3.25 3.84 3.96 3.87 ]
  step 4 qfunc [ 3.91 3.98 3.74 3.97 ]
  step 5 qfunc [ 3.99 3.99 3.59 3.97 ]
32: score 0.526 initialq 3.95 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.70 3.84 2.90 2.76 ]
  step 1 qfunc [ 3.05 3.53 3.91 3.01 ]
  step 2 qfunc [ 3.28 3.76 3.20 3.94 ]
  step 3 qfunc [ 3.03 3.90 3.11 3.96 ]
  step 4 qfunc [ 2.63 3.96 2.74 2.59 ]
  step 5 qfunc [ 3.05 3.03 3.23 3.98 ]
33: score 0.750 initialq 3.95 opt [[2 1 1 3 3 3]]
  step 0 qfunc [ 2.32 2.96 3.88 2.53 ]
  step 1 qfunc [ 2.60 3.91 2.64 2.57 ]
  step 2 qfunc [ 3.11 3.93 3.12 3.34 ]
  step 3 qfunc [ 3.95 3.84 3.61 3.96 ]
  step 4 qfunc [ 3.96 3.96 3.90 3.98 ]
  step 5 qfunc [ 3.95 3.98 3.98 3.99 ]
34: score 0.964 initialq 3.95 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.73 3.81 2.73 2.72 ]
  step 1 qfunc [ 3.06 3.88 3.07 3.08 ]
  step 2 qfunc [ 3.44 3.40 3.72 3.93 ]
  step 3 qfunc [ 3.93 3.67 3.89 3.96 ]
  step 4 qfunc [ 3.97 3.93 3.97 3.97 ]
  step 5 qfunc [ 3.97 3.97 3.98 3.97 ]
35: score 0.750 initialq 3.95 opt [[1 2 1 3 1 3]]
  step 0 qfunc [ 2.70 3.82 3.01 2.71 ]
  step 1 qfunc [ 3.03 3.18 3.90 2.79 ]
  step 2 qfunc [ 3.13 3.93 3.10 3.22 ]
  step 3 qfunc [ 3.68 3.66 3.67 3.96 ]
  step 4 qfunc [ 3.97 3.97 3.95 3.96 ]
  step 5 qfunc [ 3.15 3.08 3.08 3.99 ]
36: score 0.500 initialq 3.94 opt [[1 2 1 1 3 1]]
  step 0 qfunc [ 2.70 3.84 2.97 2.61 ]
  step 1 qfunc [ 3.04 3.21 3.91 2.96 ]
  step 2 qfunc [ 3.14 3.93 3.18 3.12 ]
  step 3 qfunc [ 3.30 3.96 3.19 3.86 ]
  step 4 qfunc [ 3.60 3.95 3.91 3.98 ]
  step 5 qfunc [ 3.98 3.99 3.97 3.97 ]
37: score 0.750 initialq 3.94 opt [[1 3 1 3 3 2]]
  step 0 qfunc [ 2.63 3.77 2.90 2.73 ]
  step 1 qfunc [ 2.89 2.87 2.99 3.92 ]
  step 2 qfunc [ 2.77 3.94 2.99 2.77 ]
  step 3 qfunc [ 3.25 3.09 2.99 3.96 ]
  step 4 qfunc [ 3.96 3.08 3.96 3.98 ]
  step 5 qfunc [ 2.99 2.96 3.98 2.99 ]
38: score 0.500 initialq 3.94 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.72 3.82 3.02 2.69 ]
  step 1 qfunc [ 2.94 3.18 3.87 3.08 ]
  step 2 qfunc [ 3.34 3.56 3.17 3.90 ]
  step 3 qfunc [ 3.20 3.90 3.90 3.95 ]
  step 4 qfunc [ 2.84 3.96 2.80 2.80 ]
  step 5 qfunc [ 3.02 3.00 3.00 3.99 ]
39: score 0.740 initialq 3.94 opt [[1 2 1 3 3 1]]
  step 0 qfunc [ 2.66 3.86 2.96 2.75 ]
  step 1 qfunc [ 2.88 3.20 3.90 3.02 ]
  step 2 qfunc [ 3.00 3.92 3.07 3.04 ]
  step 3 qfunc [ 3.82 3.94 3.70 3.95 ]
  step 4 qfunc [ 3.91 3.96 3.80 3.97 ]
  step 5 qfunc [ 3.98 3.98 3.97 3.97 ]
40: score 0.750 initialq 3.94 opt [[1 2 1 1 0 3]]
  step 0 qfunc [ 2.83 3.85 2.92 2.67 ]
  step 1 qfunc [ 2.97 3.19 3.90 2.88 ]
  step 2 qfunc [ 3.16 3.92 3.06 3.09 ]
  step 3 qfunc [ 3.63 3.95 3.33 3.95 ]
  step 4 qfunc [ 3.97 3.95 3.96 3.96 ]
  step 5 qfunc [ 3.05 3.00 3.09 3.98 ]
41: score 0.750 initialq 3.94 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.74 3.72 2.84 2.72 ]
  step 1 qfunc [ 2.88 3.88 2.98 2.92 ]
  step 2 qfunc [ 2.93 2.84 2.97 3.94 ]
  step 3 qfunc [ 3.44 3.29 2.32 3.95 ]
  step 4 qfunc [ 3.91 3.86 3.19 3.98 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
42: score 0.599 initialq 3.94 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.76 3.75 2.86 2.78 ]
  step 1 qfunc [ 2.88 3.89 2.99 2.90 ]
  step 2 qfunc [ 3.01 2.83 2.99 3.94 ]
  step 3 qfunc [ 3.58 3.61 2.70 3.96 ]
  step 4 qfunc [ 3.98 3.96 3.33 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
43: score 0.750 initialq 3.94 opt [[2 1 1 3 3 3]]
  step 0 qfunc [ 2.61 3.09 3.86 2.74 ]
  step 1 qfunc [ 2.80 3.91 2.73 2.88 ]
  step 2 qfunc [ 3.33 3.93 3.25 3.41 ]
  step 3 qfunc [ 3.46 3.95 3.83 3.96 ]
  step 4 qfunc [ 3.95 3.94 3.95 3.98 ]
  step 5 qfunc [ 3.98 3.96 3.99 3.99 ]
44: score 0.750 initialq 3.94 opt [[2 1 1 3 0 3]]
  step 0 qfunc [ 2.84 3.05 3.84 2.63 ]
  step 1 qfunc [ 2.76 3.90 2.85 2.82 ]
  step 2 qfunc [ 3.29 3.92 3.21 3.24 ]
  step 3 qfunc [ 3.92 3.89 3.33 3.96 ]
  step 4 qfunc [ 3.97 3.96 3.96 3.96 ]
  step 5 qfunc [ 3.49 3.50 3.97 3.98 ]
45: score 0.591 initialq 3.94 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.76 3.78 2.88 2.73 ]
  step 1 qfunc [ 2.94 3.89 2.99 2.96 ]
  step 2 qfunc [ 3.05 3.04 2.99 3.93 ]
  step 3 qfunc [ 3.93 3.77 2.98 3.95 ]
  step 4 qfunc [ 3.97 3.97 3.90 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
46: score 0.911 initialq 3.94 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.73 3.80 2.97 2.75 ]
  step 1 qfunc [ 3.15 3.88 3.17 2.95 ]
  step 2 qfunc [ 3.28 3.52 3.73 3.94 ]
  step 3 qfunc [ 3.95 3.93 3.88 3.96 ]
  step 4 qfunc [ 3.97 3.97 3.98 3.97 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.97 ]
47: score 0.997 initialq 3.94 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.56 3.81 2.74 2.62 ]
  step 1 qfunc [ 3.09 3.89 3.20 2.91 ]
  step 2 qfunc [ 3.29 3.30 3.43 3.94 ]
  step 3 qfunc [ 3.65 3.61 3.96 3.78 ]
  step 4 qfunc [ 3.92 3.96 3.98 3.95 ]
  step 5 qfunc [ 3.08 3.09 3.05 3.98 ]
48: score 0.750 initialq 3.94 opt [[1 1 3 1 2 3]]
  step 0 qfunc [ 2.68 3.81 2.90 2.73 ]
  step 1 qfunc [ 2.98 3.89 3.10 2.96 ]
  step 2 qfunc [ 3.08 3.29 3.07 3.93 ]
  step 3 qfunc [ 3.68 3.96 3.84 3.89 ]
  step 4 qfunc [ 3.95 3.53 3.97 3.97 ]
  step 5 qfunc [ 3.97 3.91 3.98 3.99 ]
49: score 0.750 initialq 3.94 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.78 3.78 2.87 2.71 ]
  step 1 qfunc [ 2.94 3.88 3.06 2.86 ]
  step 2 qfunc [ 3.02 3.33 3.09 3.94 ]
  step 3 qfunc [ 3.33 3.63 3.51 3.95 ]
  step 4 qfunc [ 3.94 3.86 3.96 3.98 ]
  step 5 qfunc [ 2.99 2.98 3.98 2.99 ]
50: score 1.000 initialq 3.94 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.43 3.82 2.93 2.72 ]
  step 1 qfunc [ 2.96 3.89 3.25 3.09 ]
  step 2 qfunc [ 3.07 3.43 3.30 3.93 ]
  step 3 qfunc [ 3.95 3.93 3.95 3.93 ]
  step 4 qfunc [ 3.95 3.96 3.97 3.96 ]
  step 5 qfunc [ 3.98 3.40 3.96 3.99 ]
51: score 0.750 initialq 3.94 opt [[1 1 3 1 2 3]]
  step 0 qfunc [ 2.59 3.81 2.77 2.63 ]
  step 1 qfunc [ 2.85 3.89 2.91 2.80 ]
  step 2 qfunc [ 3.13 3.22 3.08 3.93 ]
  step 3 qfunc [ 3.72 3.95 3.14 3.94 ]
  step 4 qfunc [ 2.81 3.00 3.97 3.97 ]
  step 5 qfunc [ 3.07 3.34 3.04 3.98 ]
52: score 0.750 initialq 3.94 opt [[1 2 1 3 3 1]]
  step 0 qfunc [ 2.77 3.85 2.80 2.65 ]
  step 1 qfunc [ 3.02 3.23 3.90 2.88 ]
  step 2 qfunc [ 3.16 3.93 3.19 3.31 ]
  step 3 qfunc [ 3.44 3.94 3.42 3.95 ]
  step 4 qfunc [ 3.96 3.94 3.96 3.97 ]
  step 5 qfunc [ 3.97 3.98 3.98 3.89 ]
53: score 0.500 initialq 3.94 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.77 3.86 2.92 2.68 ]
  step 1 qfunc [ 2.92 3.35 3.91 2.90 ]
  step 2 qfunc [ 3.32 3.57 3.27 3.93 ]
  step 3 qfunc [ 3.33 3.93 3.26 3.95 ]
  step 4 qfunc [ 2.90 3.96 2.79 2.78 ]
  step 5 qfunc [ 3.15 3.09 3.04 3.98 ]
54: score 0.500 initialq 3.94 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.60 3.83 2.94 2.74 ]
  step 1 qfunc [ 2.94 3.25 3.89 3.07 ]
  step 2 qfunc [ 3.31 3.91 3.37 3.27 ]
  step 3 qfunc [ 3.92 3.91 3.82 3.95 ]
  step 4 qfunc [ 3.97 3.81 3.91 3.97 ]
  step 5 qfunc [ 3.08 3.57 3.98 3.43 ]
55: score 0.742 initialq 3.94 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.73 3.84 3.07 2.62 ]
  step 1 qfunc [ 3.11 3.45 3.89 2.92 ]
  step 2 qfunc [ 3.22 3.91 3.29 3.45 ]
  step 3 qfunc [ 3.93 3.92 3.94 3.94 ]
  step 4 qfunc [ 3.95 3.95 3.94 3.96 ]
  step 5 qfunc [ 3.94 3.97 3.95 3.98 ]
56: score 0.714 initialq 3.94 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.73 3.82 2.79 2.70 ]
  step 1 qfunc [ 2.95 3.90 2.92 3.02 ]
  step 2 qfunc [ 3.25 3.21 3.18 3.94 ]
  step 3 qfunc [ 3.77 3.57 3.41 3.96 ]
  step 4 qfunc [ 3.98 3.95 3.96 3.97 ]
  step 5 qfunc [ 2.99 2.96 3.99 2.99 ]
57: score 0.750 initialq 3.94 opt [[1 1 3 1 2 3]]
  step 0 qfunc [ 2.78 3.80 2.94 2.75 ]
  step 1 qfunc [ 3.08 3.88 3.07 3.01 ]
  step 2 qfunc [ 3.43 3.42 3.41 3.93 ]
  step 3 qfunc [ 3.92 3.95 3.86 3.94 ]
  step 4 qfunc [ 3.96 3.96 3.97 3.96 ]
  step 5 qfunc [ 3.96 3.95 3.97 3.98 ]
58: score 0.750 initialq 3.94 opt [[1 1 3 3 1 2]]
  step 0 qfunc [ 2.67 3.79 2.84 2.72 ]
  step 1 qfunc [ 2.92 3.88 2.99 2.89 ]
  step 2 qfunc [ 3.06 2.99 2.99 3.93 ]
  step 3 qfunc [ 3.74 3.67 2.96 3.95 ]
  step 4 qfunc [ 3.94 3.98 3.90 3.90 ]
  step 5 qfunc [ 2.95 2.92 3.98 2.99 ]
59: score 0.615 initialq 3.94 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.68 3.71 2.87 2.71 ]
  step 1 qfunc [ 2.90 3.86 2.98 2.92 ]
  step 2 qfunc [ 2.98 3.00 2.99 3.93 ]
  step 3 qfunc [ 3.71 3.45 2.77 3.95 ]
  step 4 qfunc [ 3.97 3.96 3.94 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
60: score 0.750 initialq 3.94 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.77 3.78 2.84 2.67 ]
  step 1 qfunc [ 2.83 3.88 2.99 2.93 ]
  step 2 qfunc [ 3.14 3.08 2.99 3.93 ]
  step 3 qfunc [ 3.95 3.51 2.89 3.73 ]
  step 4 qfunc [ 3.96 3.89 3.95 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
61: score 0.682 initialq 3.94 opt [[1 3 1 3 2 3]]
  step 0 qfunc [ 2.67 3.76 2.87 2.72 ]
  step 1 qfunc [ 2.97 3.07 3.09 3.90 ]
  step 2 qfunc [ 2.77 3.92 3.02 2.76 ]
  step 3 qfunc [ 3.37 3.41 3.52 3.95 ]
  step 4 qfunc [ 3.82 3.87 3.97 3.93 ]
  step 5 qfunc [ 3.72 3.65 3.54 3.98 ]
62: score 0.987 initialq 3.94 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.70 3.83 3.00 2.58 ]
  step 1 qfunc [ 2.89 3.11 3.90 2.95 ]
  step 2 qfunc [ 3.16 3.92 3.04 3.03 ]
  step 3 qfunc [ 3.35 3.86 3.28 3.95 ]
  step 4 qfunc [ 2.76 3.90 3.93 3.96 ]
  step 5 qfunc [ 3.22 3.96 3.98 3.74 ]
63: score 0.500 initialq 3.94 opt [[1 1 2 3 2 3]]
  step 0 qfunc [ 2.57 3.80 2.74 2.45 ]
  step 1 qfunc [ 2.94 3.88 2.79 2.86 ]
  step 2 qfunc [ 3.41 3.23 3.93 3.75 ]
  step 3 qfunc [ 3.61 3.75 3.27 3.95 ]
  step 4 qfunc [ 3.93 3.96 3.97 3.78 ]
  step 5 qfunc [ 2.92 2.69 3.03 3.98 ]
64: score 0.500 initialq 3.94 opt [[1 3 1 1 3 2]]
  step 0 qfunc [ 2.74 3.78 2.88 2.70 ]
  step 1 qfunc [ 2.87 2.97 2.99 3.91 ]
  step 2 qfunc [ 2.83 3.93 2.99 2.71 ]
  step 3 qfunc [ 3.28 3.97 2.99 3.94 ]
  step 4 qfunc [ 2.76 2.70 2.99 3.98 ]
  step 5 qfunc [ 2.99 2.99 3.99 2.99 ]
65: score 0.542 initialq 3.93 opt [[2 1 3 1 3 2]]
  step 0 qfunc [ 2.73 3.11 3.84 2.76 ]
  step 1 qfunc [ 2.82 3.90 2.85 2.91 ]
  step 2 qfunc [ 3.48 3.51 3.28 3.93 ]
  step 3 qfunc [ 3.40 3.94 3.29 2.94 ]
  step 4 qfunc [ 3.78 3.96 3.86 3.96 ]
  step 5 qfunc [ 3.48 3.91 3.98 3.81 ]
66: score 0.750 initialq 3.93 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.77 3.78 2.88 2.67 ]
  step 1 qfunc [ 3.04 3.87 3.18 2.93 ]
  step 2 qfunc [ 3.17 3.16 3.77 3.93 ]
  step 3 qfunc [ 3.95 3.43 3.87 3.95 ]
  step 4 qfunc [ 3.96 3.62 3.97 3.96 ]
  step 5 qfunc [ 3.97 3.93 3.98 3.99 ]
67: score 0.750 initialq 3.93 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.63 3.78 2.93 2.78 ]
  step 1 qfunc [ 2.87 3.86 3.06 2.91 ]
  step 2 qfunc [ 3.30 3.34 3.38 3.92 ]
  step 3 qfunc [ 3.61 3.84 3.90 3.95 ]
  step 4 qfunc [ 3.96 3.96 3.97 3.93 ]
  step 5 qfunc [ 3.97 3.95 3.92 3.98 ]
68: score 0.503 initialq 3.93 opt [[1 1 3 0 0 2]]
  step 0 qfunc [ 2.69 3.76 2.85 2.67 ]
  step 1 qfunc [ 2.94 3.87 2.99 2.83 ]
  step 2 qfunc [ 2.84 3.22 2.99 3.93 ]
  step 3 qfunc [ 3.95 3.51 3.46 3.55 ]
  step 4 qfunc [ 3.97 3.94 3.80 3.95 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
69: score 0.500 initialq 3.93 opt [[1 1 3 0 1 2]]
  step 0 qfunc [ 2.72 3.79 2.82 2.79 ]
  step 1 qfunc [ 2.84 3.88 2.92 2.95 ]
  step 2 qfunc [ 3.28 3.13 2.75 3.92 ]
  step 3 qfunc [ 3.94 3.86 3.47 3.94 ]
  step 4 qfunc [ 3.95 3.96 3.94 3.96 ]
  step 5 qfunc [ 2.66 2.43 3.98 2.99 ]
70: score 0.526 initialq 3.93 opt [[1 2 1 1 3 1]]
  step 0 qfunc [ 2.78 3.85 2.90 2.76 ]
  step 1 qfunc [ 2.80 2.79 3.90 2.83 ]
  step 2 qfunc [ 3.18 3.92 3.10 3.06 ]
  step 3 qfunc [ 3.30 3.95 3.34 3.77 ]
  step 4 qfunc [ 3.32 3.95 3.06 3.97 ]
  step 5 qfunc [ 3.97 3.98 3.97 3.97 ]
71: score 0.750 initialq 3.93 opt [[1 1 3 1 2 1]]
  step 0 qfunc [ 2.73 3.79 2.84 2.54 ]
  step 1 qfunc [ 3.00 3.86 3.14 2.94 ]
  step 2 qfunc [ 3.07 3.34 3.17 3.91 ]
  step 3 qfunc [ 3.89 3.94 3.89 3.93 ]
  step 4 qfunc [ 2.95 3.58 3.96 3.80 ]
  step 5 qfunc [ 3.17 3.98 3.82 3.94 ]
72: score 0.500 initialq 3.93 opt [[1 1 3 1 0 2]]
  step 0 qfunc [ 2.60 3.75 2.76 2.62 ]
  step 1 qfunc [ 3.08 3.84 2.96 2.76 ]
  step 2 qfunc [ 3.61 3.38 3.45 3.90 ]
  step 3 qfunc [ 3.89 3.93 3.77 3.77 ]
  step 4 qfunc [ 3.95 3.75 3.91 3.32 ]
  step 5 qfunc [ 2.99 3.00 3.96 3.00 ]
73: score 0.742 initialq 3.93 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.74 3.76 3.01 2.59 ]
  step 1 qfunc [ 3.01 3.86 3.10 2.95 ]
  step 2 qfunc [ 3.30 3.23 3.19 3.92 ]
  step 3 qfunc [ 3.95 3.79 3.68 3.91 ]
  step 4 qfunc [ 3.96 3.93 3.96 3.96 ]
  step 5 qfunc [ 2.98 2.99 3.97 2.99 ]
74: score 0.703 initialq 3.93 opt [[1 3 2 3 1 3]]
  step 0 qfunc [ 2.71 3.83 2.85 2.69 ]
  step 1 qfunc [ 2.99 2.88 3.01 3.88 ]
  step 2 qfunc [ 2.81 3.27 3.90 2.85 ]
  step 3 qfunc [ 3.39 3.90 3.23 3.94 ]
  step 4 qfunc [ 2.99 3.95 3.00 2.99 ]
  step 5 qfunc [ 2.99 3.00 3.00 3.97 ]
75: score 0.602 initialq 3.93 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.70 3.80 2.98 2.72 ]
  step 1 qfunc [ 3.04 3.87 3.16 2.89 ]
  step 2 qfunc [ 3.54 3.45 3.45 3.92 ]
  step 3 qfunc [ 3.93 3.89 3.94 3.93 ]
  step 4 qfunc [ 3.94 3.95 3.97 3.74 ]
  step 5 qfunc [ 3.28 3.43 3.04 3.99 ]
76: score 0.586 initialq 3.93 opt [[1 3 1 3 0 2]]
  step 0 qfunc [ 2.71 3.77 2.84 2.73 ]
  step 1 qfunc [ 2.89 2.95 2.98 3.91 ]
  step 2 qfunc [ 2.82 3.93 2.99 2.85 ]
  step 3 qfunc [ 3.26 3.16 2.95 3.96 ]
  step 4 qfunc [ 3.97 3.61 3.91 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
77: score 0.750 initialq 3.93 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.69 3.75 2.70 2.69 ]
  step 1 qfunc [ 2.87 3.88 2.96 2.85 ]
  step 2 qfunc [ 2.95 2.91 2.99 3.93 ]
  step 3 qfunc [ 3.19 3.13 3.35 3.94 ]
  step 4 qfunc [ 3.63 2.90 3.93 3.96 ]
  step 5 qfunc [ 2.99 2.98 3.97 2.99 ]
78: score 0.500 initialq 3.93 opt [[1 1 2 1 2 3]]
  step 0 qfunc [ 2.71 3.80 2.91 2.67 ]
  step 1 qfunc [ 2.92 3.87 3.17 2.77 ]
  step 2 qfunc [ 3.44 3.33 3.92 3.48 ]
  step 3 qfunc [ 3.87 3.94 3.55 3.78 ]
  step 4 qfunc [ 3.96 3.96 3.97 3.90 ]
  step 5 qfunc [ 3.06 3.04 3.14 3.98 ]
79: score 0.750 initialq 3.93 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.80 3.83 2.95 2.73 ]
  step 1 qfunc [ 2.99 3.14 3.90 2.98 ]
  step 2 qfunc [ 3.17 3.92 3.24 2.98 ]
  step 3 qfunc [ 3.14 3.94 3.07 3.93 ]
  step 4 qfunc [ 2.85 3.77 2.90 3.97 ]
  step 5 qfunc [ 3.92 3.96 3.98 3.99 ]
80: score 0.612 initialq 3.93 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.76 3.76 2.87 2.75 ]
  step 1 qfunc [ 2.86 3.88 2.99 2.86 ]
  step 2 qfunc [ 2.96 2.98 2.99 3.94 ]
  step 3 qfunc [ 3.53 3.43 2.79 3.96 ]
  step 4 qfunc [ 3.97 3.96 3.02 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
81: score 0.500 initialq 3.93 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.68 3.79 2.91 2.73 ]
  step 1 qfunc [ 2.98 3.86 3.34 2.87 ]
  step 2 qfunc [ 3.54 3.40 3.69 3.91 ]
  step 3 qfunc [ 3.94 3.91 3.90 3.88 ]
  step 4 qfunc [ 3.57 3.95 3.95 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
82: score 0.753 initialq 3.92 opt [[1 3 1 3 3 2]]
  step 0 qfunc [ 2.74 3.74 2.88 2.74 ]
  step 1 qfunc [ 2.87 2.97 2.99 3.90 ]
  step 2 qfunc [ 2.78 3.93 2.99 2.73 ]
  step 3 qfunc [ 3.21 3.31 2.99 3.95 ]
  step 4 qfunc [ 3.94 3.14 3.96 3.96 ]
  step 5 qfunc [ 2.99 2.96 3.97 2.99 ]
83: score 0.750 initialq 3.92 opt [[1 2 1 3 2 2]]
  step 0 qfunc [ 2.58 3.85 2.91 2.73 ]
  step 1 qfunc [ 3.07 3.32 3.90 2.95 ]
  step 2 qfunc [ 3.33 3.93 3.02 3.48 ]
  step 3 qfunc [ 3.66 3.69 3.44 3.95 ]
  step 4 qfunc [ 3.83 3.96 3.97 3.87 ]
  step 5 qfunc [ 3.95 3.97 3.98 3.96 ]
84: score 0.773 initialq 3.92 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.74 3.76 2.64 2.76 ]
  step 1 qfunc [ 3.00 3.84 3.14 3.08 ]
  step 2 qfunc [ 3.26 3.20 3.57 3.90 ]
  step 3 qfunc [ 3.90 3.55 3.51 3.91 ]
  step 4 qfunc [ 3.91 3.91 3.94 3.88 ]
  step 5 qfunc [ 3.91 3.58 3.97 3.92 ]
85: score 0.714 initialq 3.92 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.63 3.81 2.96 2.69 ]
  step 1 qfunc [ 2.95 3.27 3.88 3.01 ]
  step 2 qfunc [ 3.07 3.55 3.09 3.91 ]
  step 3 qfunc [ 3.28 3.92 3.12 3.32 ]
  step 4 qfunc [ 3.80 3.68 3.63 3.96 ]
  step 5 qfunc [ 3.91 3.66 3.97 3.65 ]
86: score 0.719 initialq 3.92 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.76 3.60 2.91 2.76 ]
  step 1 qfunc [ 2.90 3.87 2.98 2.89 ]
  step 2 qfunc [ 2.93 2.91 2.87 3.94 ]
  step 3 qfunc [ 3.24 3.25 2.48 3.96 ]
  step 4 qfunc [ 3.86 3.10 3.14 3.97 ]
  step 5 qfunc [ 2.98 2.96 3.98 2.99 ]
87: score 0.750 initialq 3.92 opt [[1 3 1 3 3 2]]
  step 0 qfunc [ 2.70 3.78 2.85 2.66 ]
  step 1 qfunc [ 2.87 2.87 2.98 3.91 ]
  step 2 qfunc [ 2.82 3.93 2.98 2.76 ]
  step 3 qfunc [ 3.24 3.05 2.96 3.95 ]
  step 4 qfunc [ 3.86 3.62 3.90 3.97 ]
  step 5 qfunc [ 2.99 2.98 3.98 2.99 ]
88: score 0.766 initialq 3.92 opt [[1 2 0 1 3 3]]
  step 0 qfunc [ 2.70 3.76 2.90 2.73 ]
  step 1 qfunc [ 3.04 3.14 3.86 2.99 ]
  step 2 qfunc [ 3.90 3.29 3.21 3.01 ]
  step 3 qfunc [ 3.01 3.92 3.07 3.06 ]
  step 4 qfunc [ 3.88 3.93 3.91 3.95 ]
  step 5 qfunc [ 3.24 3.31 3.13 3.96 ]
89: score 0.747 initialq 3.92 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.75 3.70 2.88 2.69 ]
  step 1 qfunc [ 2.90 3.87 2.99 2.89 ]
  step 2 qfunc [ 3.02 3.00 2.92 3.93 ]
  step 3 qfunc [ 3.95 3.20 2.51 3.35 ]
  step 4 qfunc [ 3.26 3.16 2.95 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.96 2.99 ]
90: score 0.596 initialq 3.92 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.69 3.79 2.85 2.62 ]
  step 1 qfunc [ 2.85 3.16 3.88 3.04 ]
  step 2 qfunc [ 3.17 3.30 3.11 3.90 ]
  step 3 qfunc [ 3.21 3.86 3.27 3.93 ]
  step 4 qfunc [ 2.96 3.94 2.98 2.95 ]
  step 5 qfunc [ 2.99 3.01 3.00 3.96 ]
91: score 0.750 initialq 3.92 opt [[2 1 1 1 2 3]]
  step 0 qfunc [ 2.69 3.04 3.80 2.58 ]
  step 1 qfunc [ 2.66 3.86 2.86 2.76 ]
  step 2 qfunc [ 3.18 3.88 3.20 3.23 ]
  step 3 qfunc [ 3.40 3.92 3.91 3.38 ]
  step 4 qfunc [ 3.95 3.93 3.95 3.72 ]
  step 5 qfunc [ 3.02 2.99 3.05 3.97 ]
92: score 0.500 initialq 3.92 opt [[1 2 1 1 0 1]]
  step 0 qfunc [ 2.75 3.82 2.83 2.87 ]
  step 1 qfunc [ 2.82 3.18 3.88 3.03 ]
  step 2 qfunc [ 2.99 3.90 3.14 3.12 ]
  step 3 qfunc [ 3.12 3.93 3.37 3.83 ]
  step 4 qfunc [ 3.96 3.94 3.93 3.91 ]
  step 5 qfunc [ 3.96 3.97 3.95 3.42 ]
93: score 0.750 initialq 3.91 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.80 3.80 2.85 2.80 ]
  step 1 qfunc [ 2.94 3.86 3.01 2.93 ]
  step 2 qfunc [ 3.58 3.58 3.60 3.93 ]
  step 3 qfunc [ 3.93 3.94 3.93 3.95 ]
  step 4 qfunc [ 3.96 3.95 3.98 3.90 ]
  step 5 qfunc [ 3.98 3.98 3.96 3.99 ]
94: score 0.669 initialq 3.91 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.67 3.76 2.79 2.60 ]
  step 1 qfunc [ 2.76 3.84 2.97 2.91 ]
  step 2 qfunc [ 3.27 3.01 2.94 3.94 ]
  step 3 qfunc [ 3.95 3.80 3.76 3.96 ]
  step 4 qfunc [ 3.97 3.95 3.97 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.99 2.99 ]
95: score 0.750 initialq 3.91 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.82 3.81 2.92 2.62 ]
  step 1 qfunc [ 3.05 3.34 3.88 2.89 ]
  step 2 qfunc [ 3.25 3.48 3.30 3.90 ]
  step 3 qfunc [ 3.16 3.91 3.35 3.13 ]
  step 4 qfunc [ 3.76 3.63 3.93 3.95 ]
  step 5 qfunc [ 3.77 3.95 3.97 3.95 ]
96: score 0.727 initialq 3.90 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.66 3.76 2.80 2.72 ]
  step 1 qfunc [ 2.85 3.85 2.98 2.96 ]
  step 2 qfunc [ 3.10 3.12 2.95 3.93 ]
  step 3 qfunc [ 3.93 3.56 3.37 3.95 ]
  step 4 qfunc [ 3.97 3.90 3.94 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
97: score 0.750 initialq 3.90 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.76 3.74 2.86 2.70 ]
  step 1 qfunc [ 2.89 3.86 2.98 2.85 ]
  step 2 qfunc [ 3.05 3.00 2.87 3.94 ]
  step 3 qfunc [ 3.47 3.20 2.52 3.96 ]
  step 4 qfunc [ 3.97 3.42 3.89 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
98: score 0.706 initialq 3.84 opt [[1 3 1 1 3 2]]
  step 0 qfunc [ 2.74 3.55 2.91 2.80 ]
  step 1 qfunc [ 2.86 2.81 2.95 3.83 ]
  step 2 qfunc [ 2.75 3.87 2.97 2.84 ]
  step 3 qfunc [ 2.91 3.89 2.91 3.40 ]
  step 4 qfunc [ 2.74 2.71 2.99 3.91 ]
  step 5 qfunc [ 2.99 2.98 3.92 2.99 ]
99: score 0.539 initialq 3.83 opt [[1 2 1 1 1 3]]
  step 0 qfunc [ 2.70 3.72 2.91 2.73 ]
  step 1 qfunc [ 2.88 3.07 3.79 2.95 ]
  step 2 qfunc [ 3.05 3.82 3.04 3.07 ]
  step 3 qfunc [ 3.63 3.95 3.42 3.60 ]
  step 4 qfunc [ 3.97 3.97 3.95 3.95 ]
  step 5 qfunc [ 3.04 3.01 3.07 3.98 ]

In [36]:
'''
Let's look for the cases where the policy is correct until the last step, and the last step is wrong.
And good models.
'''
good2 = [5,13,34,46,62,84] # last steps end up being 2
good3 = [3,7,14,26,47,50,63,75] # last steps end up being 3
final2 = [4,11,16,17,20,29,33,39,43,52,55,66,67,93] # last step should've been 2
final3 = [6] # last step should've been 3

# now we can do a preliminary robust matrix evaluation for the good models and the last step should be 2 models
model_ixs = np.concatenate([good2, final2])
six.print_(model_ixs)
rmat = np.zeros((model_ixs.shape[0],model_ixs.shape[0]))
# rmat[rmodel,cmodel] = the value of rmodel's policy in cmodel
for pix in six.moves.range(model_ixs.shape[0]):
    policy = sorted_opts[model_ixs[pix],0,:]
    last_act = policy[-1]
    #six.print_(last_act)
    for eix in six.moves.range(model_ixs.shape[0]):
        # qfunc of last step
        last_q = sorted_qfuncs[model_ixs[eix],-1,:]
        #six.print_(last_q)
        rmat[pix,eix] = last_q[last_act]
six.print_(rmat)
six.print_(np.mean(rmat,axis=1))
six.print_(np.min(rmat,axis=1))


[ 5 13 34 46 62 84  4 11 16 17 20 29 33 39 43 52 55 66 67 93]
[[ 3.98837483  3.98672328  3.98188117  3.98886294  3.97772018  3.9653923
   3.98727488  3.98806248  3.98553694  3.98821222  3.98721899  3.90402416
   3.97742461  3.97498535  3.98551816  3.97761134  3.95061849  3.97987959
   3.92001373  3.95773623]
 [ 3.98837483  3.98672328  3.98188117  3.98886294  3.97772018  3.9653923
   3.98727488  3.98806248  3.98553694  3.98821222  3.98721899  3.90402416
   3.97742461  3.97498535  3.98551816  3.97761134  3.95061849  3.97987959
   3.92001373  3.95773623]
 [ 3.98837483  3.98672328  3.98188117  3.98886294  3.97772018  3.9653923
   3.98727488  3.98806248  3.98553694  3.98821222  3.98721899  3.90402416
   3.97742461  3.97498535  3.98551816  3.97761134  3.95061849  3.97987959
   3.92001373  3.95773623]
 [ 3.98837483  3.98672328  3.98188117  3.98886294  3.97772018  3.9653923
   3.98727488  3.98806248  3.98553694  3.98821222  3.98721899  3.90402416
   3.97742461  3.97498535  3.98551816  3.97761134  3.95061849  3.97987959
   3.92001373  3.95773623]
 [ 3.98837483  3.98672328  3.98188117  3.98886294  3.97772018  3.9653923
   3.98727488  3.98806248  3.98553694  3.98821222  3.98721899  3.90402416
   3.97742461  3.97498535  3.98551816  3.97761134  3.95061849  3.97987959
   3.92001373  3.95773623]
 [ 3.98837483  3.98672328  3.98188117  3.98886294  3.97772018  3.9653923
   3.98727488  3.98806248  3.98553694  3.98821222  3.98721899  3.90402416
   3.97742461  3.97498535  3.98551816  3.97761134  3.95061849  3.97987959
   3.92001373  3.95773623]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98807736  3.96020339  3.97351647  3.9858234   3.96360565  3.57574994
   3.98655573  3.98551312  3.9870714   3.98865285  3.97948989  3.97111562
   3.9821775   3.98146364  3.96074294  3.98066896  3.96733496  3.93318591
   3.94903406  3.98337299]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98769802  3.95175663  3.9707029   3.98652552  3.22290444  3.91385941
   3.98387826  3.98754257  3.98730893  3.98493903  3.98138427  3.98809341
   3.9505388   3.97801494  3.97861271  3.97388177  3.94275548  3.96784131
   3.97334717  3.98344242]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98807736  3.96020339  3.97351647  3.9858234   3.96360565  3.57574994
   3.98655573  3.98551312  3.9870714   3.98865285  3.97948989  3.97111562
   3.9821775   3.98146364  3.96074294  3.98066896  3.96733496  3.93318591
   3.94903406  3.98337299]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98807736  3.96020339  3.97351647  3.9858234   3.96360565  3.57574994
   3.98655573  3.98551312  3.9870714   3.98865285  3.97948989  3.97111562
   3.9821775   3.98146364  3.96074294  3.98066896  3.96733496  3.93318591
   3.94903406  3.98337299]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]
 [ 3.98662905  3.98469254  3.96945573  3.9726783   3.74197243  3.91695007
   3.98837875  3.98854498  3.98850741  3.9883933   3.98739979  3.98526399
   3.98550706  3.97166833  3.98665456  3.88637105  3.97577658  3.98713791
   3.9835632   3.98705145]]
[ 3.97265359  3.97265359  3.97265359  3.97265359  3.97265359  3.97265359
  3.96312982  3.96312982  3.96312982  3.95416779  3.96312982  3.9347514
  3.96312982  3.95416779  3.96312982  3.95416779  3.96312982  3.96312982
  3.96312982  3.96312982]
[ 3.90402416  3.90402416  3.90402416  3.90402416  3.90402416  3.90402416
  3.74197243  3.74197243  3.74197243  3.57574994  3.74197243  3.22290444
  3.74197243  3.57574994  3.74197243  3.57574994  3.74197243  3.74197243
  3.74197243  3.74197243]

In [37]:
'''
Checking the policies and q-values of the learned models for dropout=0.8 and epoch23
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runC.npz')
data13 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runD.npz')

data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runC.npz')
data23 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real1-runD.npz')

data31 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runC.npz')
data33 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real0-runD.npz')

data41 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runA.npz')
data42 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runC.npz')
data43 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runD.npz')

data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runC.npz')
data53 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runD.npz')

data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runC.npz')
data63 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runD.npz')

vloss = np.concatenate([data11['vloss'],data12['vloss'],data13['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0],data23['scores'][:,0]])
#trueqvals = np.concatenate([data21['qvals'][:,0],data22['qvals'][:,0],data23['qvals'][:,0]])
#falseqvals = np.concatenate([data31['qvals'][:,0],data32['qvals'][:,0],data33['qvals'][:,0]])
rewards = np.concatenate([data41['rewards'][:,0],data42['rewards'][:,0],data43['rewards'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0],data53['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts'],data63['opts']])
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:],data63['qs'][:,0,:,:]])

sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
    for t in six.moves.range(6):
        six.print_('  step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
    pass


 0: score 0.750 initialq 3.43 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.16 3.27 2.27 2.16 ]
  step 1 qfunc [ 2.36 3.37 2.37 2.37 ]
  step 2 qfunc [ 2.43 2.71 2.41 3.49 ]
  step 3 qfunc [ 3.01 2.89 2.89 3.51 ]
  step 4 qfunc [ 3.36 3.40 3.52 2.62 ]
  step 5 qfunc [ 3.54 3.46 3.53 3.13 ]
 1: score 0.750 initialq 3.40 opt [[2 1 1 3 3 0]]
  step 0 qfunc [ 2.26 2.61 3.31 2.29 ]
  step 1 qfunc [ 2.39 3.37 2.33 2.33 ]
  step 2 qfunc [ 2.58 3.40 2.58 2.65 ]
  step 3 qfunc [ 3.12 3.07 2.77 3.44 ]
  step 4 qfunc [ 3.44 3.41 2.76 3.45 ]
  step 5 qfunc [ 3.46 3.44 3.27 3.45 ]
 2: score 0.750 initialq 3.39 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.23 2.49 3.27 2.21 ]
  step 1 qfunc [ 2.35 3.34 2.29 2.41 ]
  step 2 qfunc [ 2.66 3.37 2.54 2.68 ]
  step 3 qfunc [ 3.05 3.42 2.59 3.25 ]
  step 4 qfunc [ 3.06 3.15 2.61 3.45 ]
  step 5 qfunc [ 3.45 3.19 3.46 3.27 ]
 3: score 0.753 initialq 3.37 opt [[1 1 3 2 3 0]]
  step 0 qfunc [ 2.28 3.20 2.38 2.23 ]
  step 1 qfunc [ 2.41 3.30 2.56 2.39 ]
  step 2 qfunc [ 2.41 2.58 2.64 3.38 ]
  step 3 qfunc [ 2.90 2.79 3.40 3.12 ]
  step 4 qfunc [ 2.97 3.31 2.47 3.42 ]
  step 5 qfunc [ 3.44 3.22 3.43 3.02 ]
 4: score 1.000 initialq 3.36 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.28 3.17 2.40 2.25 ]
  step 1 qfunc [ 2.43 3.27 2.60 2.43 ]
  step 2 qfunc [ 2.45 2.62 2.68 3.41 ]
  step 3 qfunc [ 2.89 2.85 3.43 2.85 ]
  step 4 qfunc [ 3.30 3.20 3.46 3.36 ]
  step 5 qfunc [ 3.00 3.32 2.86 3.49 ]
 5: score 1.000 initialq 3.33 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.25 3.18 2.47 2.33 ]
  step 1 qfunc [ 2.41 2.58 3.27 2.45 ]
  step 2 qfunc [ 2.58 3.31 2.58 2.60 ]
  step 3 qfunc [ 2.79 2.84 2.82 3.39 ]
  step 4 qfunc [ 3.21 2.93 3.34 3.41 ]
  step 5 qfunc [ 2.60 2.51 3.46 2.56 ]
 6: score 1.000 initialq 3.33 opt [[1 1 2 3 3 2]]
  step 0 qfunc [ 2.26 3.11 2.37 2.24 ]
  step 1 qfunc [ 2.38 3.26 2.55 2.44 ]
  step 2 qfunc [ 2.39 2.49 3.35 2.78 ]
  step 3 qfunc [ 2.50 2.63 2.54 3.38 ]
  step 4 qfunc [ 2.73 2.78 2.55 3.39 ]
  step 5 qfunc [ 2.84 2.59 3.40 2.61 ]
 7: score 0.750 initialq 3.32 opt [[1 1 3 2 3 0]]
  step 0 qfunc [ 2.28 3.14 2.36 2.17 ]
  step 1 qfunc [ 2.45 3.24 2.48 2.43 ]
  step 2 qfunc [ 2.50 2.84 2.56 3.29 ]
  step 3 qfunc [ 2.96 3.06 3.32 2.94 ]
  step 4 qfunc [ 3.22 3.28 2.90 3.34 ]
  step 5 qfunc [ 3.37 3.14 3.28 2.73 ]
 8: score 1.000 initialq 3.32 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.18 3.15 2.32 2.23 ]
  step 1 qfunc [ 2.33 3.24 2.47 2.37 ]
  step 2 qfunc [ 2.29 2.50 2.51 3.41 ]
  step 3 qfunc [ 2.95 2.59 3.44 2.93 ]
  step 4 qfunc [ 3.35 2.95 3.20 3.46 ]
  step 5 qfunc [ 3.03 2.76 3.47 2.62 ]
 9: score 1.000 initialq 3.31 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.22 3.12 2.32 2.18 ]
  step 1 qfunc [ 2.34 3.23 2.44 2.30 ]
  step 2 qfunc [ 2.44 2.47 2.45 3.37 ]
  step 3 qfunc [ 2.92 2.76 3.27 3.41 ]
  step 4 qfunc [ 3.02 2.73 3.43 2.65 ]
  step 5 qfunc [ 3.38 3.12 3.45 2.74 ]
10: score 1.000 initialq 3.30 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.19 3.10 2.34 2.21 ]
  step 1 qfunc [ 2.37 3.22 2.49 2.36 ]
  step 2 qfunc [ 2.33 2.49 2.54 3.35 ]
  step 3 qfunc [ 2.64 2.53 3.37 2.86 ]
  step 4 qfunc [ 2.94 2.90 2.69 3.38 ]
  step 5 qfunc [ 3.22 2.59 3.39 2.72 ]
11: score 1.000 initialq 3.29 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.29 3.09 2.39 2.26 ]
  step 1 qfunc [ 2.42 3.22 2.53 2.42 ]
  step 2 qfunc [ 2.30 2.46 2.53 3.35 ]
  step 3 qfunc [ 2.71 2.55 3.38 2.90 ]
  step 4 qfunc [ 3.18 3.18 2.82 3.40 ]
  step 5 qfunc [ 3.39 2.69 3.43 2.64 ]
12: score 0.966 initialq 3.29 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.20 3.11 2.36 2.23 ]
  step 1 qfunc [ 2.34 3.22 2.57 2.39 ]
  step 2 qfunc [ 2.47 2.62 2.73 3.29 ]
  step 3 qfunc [ 2.68 2.60 3.31 2.68 ]
  step 4 qfunc [ 3.22 3.05 2.83 3.33 ]
  step 5 qfunc [ 3.34 2.96 3.34 2.74 ]
13: score 1.000 initialq 3.28 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.22 3.11 2.25 2.17 ]
  step 1 qfunc [ 2.38 3.22 2.56 2.37 ]
  step 2 qfunc [ 2.32 2.55 2.54 3.38 ]
  step 3 qfunc [ 2.68 2.70 3.40 2.92 ]
  step 4 qfunc [ 3.04 2.96 2.76 3.41 ]
  step 5 qfunc [ 3.28 3.05 3.48 2.85 ]
14: score 1.000 initialq 3.27 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 3.10 2.40 2.26 ]
  step 1 qfunc [ 2.42 3.19 2.56 2.38 ]
  step 2 qfunc [ 2.21 2.60 2.53 3.28 ]
  step 3 qfunc [ 3.08 2.81 3.31 3.10 ]
  step 4 qfunc [ 3.22 3.07 3.30 3.34 ]
  step 5 qfunc [ 3.21 3.08 3.37 2.73 ]
15: score 1.000 initialq 3.26 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.31 3.11 2.44 2.27 ]
  step 1 qfunc [ 2.44 2.54 3.21 2.46 ]
  step 2 qfunc [ 2.54 3.24 2.53 2.57 ]
  step 3 qfunc [ 2.61 2.92 2.60 3.32 ]
  step 4 qfunc [ 3.03 3.09 2.80 3.33 ]
  step 5 qfunc [ 3.14 2.81 3.39 2.65 ]
16: score 0.742 initialq 3.26 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.24 3.02 2.38 2.24 ]
  step 1 qfunc [ 2.32 3.17 2.41 2.35 ]
  step 2 qfunc [ 2.36 2.59 2.64 3.27 ]
  step 3 qfunc [ 2.68 2.80 2.82 3.30 ]
  step 4 qfunc [ 3.25 3.09 3.36 2.97 ]
  step 5 qfunc [ 3.38 3.31 3.08 2.86 ]
17: score 1.000 initialq 3.26 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 3.07 2.49 2.25 ]
  step 1 qfunc [ 2.43 3.18 2.65 2.41 ]
  step 2 qfunc [ 2.59 2.60 2.89 3.31 ]
  step 3 qfunc [ 2.95 2.70 3.33 3.13 ]
  step 4 qfunc [ 3.21 3.08 3.25 3.36 ]
  step 5 qfunc [ 2.98 2.79 3.40 2.65 ]
18: score 1.000 initialq 3.25 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.25 3.10 2.34 2.17 ]
  step 1 qfunc [ 2.30 3.19 2.50 2.34 ]
  step 2 qfunc [ 2.44 2.66 2.53 3.26 ]
  step 3 qfunc [ 2.71 2.72 3.28 2.97 ]
  step 4 qfunc [ 3.10 3.15 3.07 3.30 ]
  step 5 qfunc [ 3.34 3.19 3.34 3.17 ]
19: score 0.500 initialq 3.25 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.27 3.05 2.41 2.19 ]
  step 1 qfunc [ 2.40 3.17 2.57 2.44 ]
  step 2 qfunc [ 2.52 2.66 2.59 3.26 ]
  step 3 qfunc [ 2.90 2.80 3.29 2.70 ]
  step 4 qfunc [ 3.31 3.22 2.67 3.27 ]
  step 5 qfunc [ 3.32 3.18 2.87 2.83 ]
20: score 1.000 initialq 3.25 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.26 3.10 2.47 2.25 ]
  step 1 qfunc [ 2.44 2.59 3.19 2.46 ]
  step 2 qfunc [ 2.55 3.23 2.52 2.59 ]
  step 3 qfunc [ 2.77 2.81 2.61 3.31 ]
  step 4 qfunc [ 3.13 2.96 2.98 3.33 ]
  step 5 qfunc [ 3.16 3.03 3.41 2.83 ]
21: score 1.000 initialq 3.25 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.19 3.07 2.31 2.18 ]
  step 1 qfunc [ 2.43 3.18 2.49 2.38 ]
  step 2 qfunc [ 2.50 2.52 2.53 3.33 ]
  step 3 qfunc [ 2.81 2.71 3.35 3.05 ]
  step 4 qfunc [ 2.96 3.15 2.63 3.36 ]
  step 5 qfunc [ 3.40 3.10 3.41 2.79 ]
22: score 1.000 initialq 3.25 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.25 3.06 2.33 2.27 ]
  step 1 qfunc [ 2.39 3.16 2.50 2.37 ]
  step 2 qfunc [ 2.48 2.56 2.52 3.29 ]
  step 3 qfunc [ 2.86 2.63 3.10 3.31 ]
  step 4 qfunc [ 3.04 2.99 3.38 2.71 ]
  step 5 qfunc [ 3.32 3.26 3.40 2.94 ]
23: score 1.000 initialq 3.22 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.24 3.01 2.34 2.23 ]
  step 1 qfunc [ 2.32 3.14 2.42 2.30 ]
  step 2 qfunc [ 2.24 2.48 2.43 3.25 ]
  step 3 qfunc [ 2.72 2.56 2.38 3.28 ]
  step 4 qfunc [ 3.14 2.72 3.28 2.77 ]
  step 5 qfunc [ 3.24 3.03 3.30 3.01 ]
24: score 1.000 initialq 3.22 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 3.06 2.39 2.24 ]
  step 1 qfunc [ 2.38 3.17 2.61 2.41 ]
  step 2 qfunc [ 2.36 2.42 2.63 3.33 ]
  step 3 qfunc [ 2.57 2.50 3.35 3.04 ]
  step 4 qfunc [ 2.91 2.98 2.78 3.36 ]
  step 5 qfunc [ 3.37 3.18 3.44 3.10 ]
25: score 0.750 initialq 3.22 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.26 2.96 2.34 2.26 ]
  step 1 qfunc [ 2.35 3.14 2.44 2.39 ]
  step 2 qfunc [ 2.27 2.43 2.53 3.29 ]
  step 3 qfunc [ 2.48 2.38 2.52 3.31 ]
  step 4 qfunc [ 2.62 2.58 3.41 3.02 ]
  step 5 qfunc [ 2.98 3.19 2.78 3.42 ]
26: score 1.000 initialq 3.21 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.27 3.04 2.48 2.32 ]
  step 1 qfunc [ 2.47 2.57 3.15 2.45 ]
  step 2 qfunc [ 2.55 3.19 2.55 2.55 ]
  step 3 qfunc [ 2.63 2.83 2.57 3.29 ]
  step 4 qfunc [ 2.93 2.76 3.06 3.30 ]
  step 5 qfunc [ 2.65 2.52 3.30 2.55 ]
27: score 1.000 initialq 3.21 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.27 3.05 2.38 2.23 ]
  step 1 qfunc [ 2.36 3.14 2.51 2.39 ]
  step 2 qfunc [ 2.43 2.60 2.60 3.28 ]
  step 3 qfunc [ 2.79 2.74 3.29 2.89 ]
  step 4 qfunc [ 3.16 3.06 3.16 3.32 ]
  step 5 qfunc [ 3.09 2.81 3.37 2.71 ]
28: score 0.750 initialq 3.20 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.28 2.97 2.39 2.21 ]
  step 1 qfunc [ 2.42 3.10 2.55 2.39 ]
  step 2 qfunc [ 2.58 2.59 2.57 3.21 ]
  step 3 qfunc [ 2.87 2.64 2.80 3.24 ]
  step 4 qfunc [ 3.26 2.92 3.20 3.02 ]
  step 5 qfunc [ 2.63 2.62 3.28 2.56 ]
29: score 0.999 initialq 3.20 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.24 3.03 2.47 2.29 ]
  step 1 qfunc [ 2.45 2.58 3.13 2.44 ]
  step 2 qfunc [ 2.61 3.18 2.57 2.61 ]
  step 3 qfunc [ 2.74 2.83 2.59 3.28 ]
  step 4 qfunc [ 2.93 2.93 2.73 3.29 ]
  step 5 qfunc [ 3.26 3.10 3.29 3.20 ]
30: score 1.000 initialq 3.18 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.25 2.45 3.05 2.23 ]
  step 1 qfunc [ 2.23 3.13 2.30 2.30 ]
  step 2 qfunc [ 2.51 3.16 2.46 2.54 ]
  step 3 qfunc [ 2.65 2.79 2.63 3.28 ]
  step 4 qfunc [ 3.15 2.84 3.11 3.29 ]
  step 5 qfunc [ 3.06 2.70 3.44 2.98 ]
31: score 0.993 initialq 3.17 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 2.96 2.37 2.26 ]
  step 1 qfunc [ 2.34 3.10 2.47 2.35 ]
  step 2 qfunc [ 2.38 2.63 2.50 3.24 ]
  step 3 qfunc [ 2.47 2.81 3.26 2.87 ]
  step 4 qfunc [ 2.84 2.74 2.69 3.27 ]
  step 5 qfunc [ 2.72 2.47 3.41 2.54 ]
32: score 0.500 initialq 3.17 opt [[1 1 1 3 2 1]]
  step 0 qfunc [ 2.19 2.97 2.31 2.18 ]
  step 1 qfunc [ 2.32 3.09 2.41 2.32 ]
  step 2 qfunc [ 2.35 3.25 2.42 2.60 ]
  step 3 qfunc [ 2.39 2.48 2.49 3.37 ]
  step 4 qfunc [ 2.88 2.68 3.40 2.64 ]
  step 5 qfunc [ 3.39 3.41 2.94 2.97 ]
33: score 0.988 initialq 3.17 opt [[1 1 2 3 3 2]]
  step 0 qfunc [ 2.26 2.92 2.36 2.26 ]
  step 1 qfunc [ 2.40 3.11 2.50 2.46 ]
  step 2 qfunc [ 2.54 2.44 3.30 2.59 ]
  step 3 qfunc [ 2.58 2.53 2.60 3.32 ]
  step 4 qfunc [ 2.78 2.45 2.72 3.34 ]
  step 5 qfunc [ 2.61 2.49 3.32 2.59 ]
34: score 0.504 initialq 3.17 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.24 3.00 2.41 2.26 ]
  step 1 qfunc [ 2.36 2.52 3.11 2.42 ]
  step 2 qfunc [ 2.48 3.14 2.50 2.49 ]
  step 3 qfunc [ 2.58 3.22 2.52 2.85 ]
  step 4 qfunc [ 3.01 3.10 2.55 3.30 ]
  step 5 qfunc [ 3.30 3.21 3.18 3.22 ]
35: score 0.999 initialq 3.16 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.20 2.98 2.38 2.25 ]
  step 1 qfunc [ 2.38 2.55 3.10 2.40 ]
  step 2 qfunc [ 2.47 3.15 2.48 2.58 ]
  step 3 qfunc [ 2.78 2.84 2.62 3.24 ]
  step 4 qfunc [ 3.06 2.94 2.63 3.26 ]
  step 5 qfunc [ 2.63 2.61 3.35 2.55 ]
36: score 1.000 initialq 3.16 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.28 2.98 2.40 2.23 ]
  step 1 qfunc [ 2.36 3.07 2.56 2.38 ]
  step 2 qfunc [ 2.56 2.67 2.72 3.26 ]
  step 3 qfunc [ 2.88 2.86 3.28 3.10 ]
  step 4 qfunc [ 3.22 3.07 3.30 3.30 ]
  step 5 qfunc [ 3.08 2.96 3.05 3.34 ]
37: score 0.750 initialq 3.16 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.22 2.92 2.38 2.23 ]
  step 1 qfunc [ 2.43 3.05 2.57 2.50 ]
  step 2 qfunc [ 2.65 2.72 2.60 3.19 ]
  step 3 qfunc [ 2.82 2.82 2.97 3.22 ]
  step 4 qfunc [ 3.30 3.10 3.19 2.60 ]
  step 5 qfunc [ 2.62 2.62 3.30 2.55 ]
38: score 0.750 initialq 3.15 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.28 2.86 2.41 2.25 ]
  step 1 qfunc [ 2.37 3.08 2.49 2.40 ]
  step 2 qfunc [ 2.35 2.55 2.49 3.25 ]
  step 3 qfunc [ 2.47 2.50 2.54 3.27 ]
  step 4 qfunc [ 2.65 2.49 3.32 3.26 ]
  step 5 qfunc [ 3.34 3.14 2.92 3.28 ]
39: score 0.750 initialq 3.15 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.28 2.91 2.43 2.28 ]
  step 1 qfunc [ 2.39 3.07 2.51 2.38 ]
  step 2 qfunc [ 2.46 2.49 2.53 3.21 ]
  step 3 qfunc [ 2.56 2.47 2.75 3.24 ]
  step 4 qfunc [ 3.02 2.90 3.34 2.87 ]
  step 5 qfunc [ 3.36 3.29 3.14 3.28 ]
40: score 1.000 initialq 3.14 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.23 2.95 2.31 2.24 ]
  step 1 qfunc [ 2.31 3.07 2.35 2.31 ]
  step 2 qfunc [ 2.47 2.53 2.38 3.17 ]
  step 3 qfunc [ 2.82 2.70 3.20 2.93 ]
  step 4 qfunc [ 2.38 2.70 2.32 3.21 ]
  step 5 qfunc [ 3.29 3.11 3.47 2.84 ]
41: score 0.750 initialq 3.13 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.21 2.90 2.29 2.16 ]
  step 1 qfunc [ 2.32 3.05 2.44 2.38 ]
  step 2 qfunc [ 2.30 2.41 2.42 3.21 ]
  step 3 qfunc [ 2.40 2.55 2.50 3.23 ]
  step 4 qfunc [ 2.93 3.09 3.41 3.19 ]
  step 5 qfunc [ 3.36 3.28 3.22 3.43 ]
42: score 0.500 initialq 3.12 opt [[1 1 3 1 0 2]]
  step 0 qfunc [ 2.26 2.85 2.41 2.25 ]
  step 1 qfunc [ 2.35 3.03 2.48 2.35 ]
  step 2 qfunc [ 2.33 2.57 2.47 3.21 ]
  step 3 qfunc [ 2.81 3.24 2.19 2.84 ]
  step 4 qfunc [ 3.27 2.56 2.54 2.97 ]
  step 5 qfunc [ 2.62 2.53 3.33 2.61 ]
43: score 1.000 initialq 3.12 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.19 2.91 2.29 2.23 ]
  step 1 qfunc [ 2.27 3.04 2.38 2.34 ]
  step 2 qfunc [ 2.38 2.42 2.49 3.20 ]
  step 3 qfunc [ 2.65 2.55 2.90 3.22 ]
  step 4 qfunc [ 2.98 2.81 3.32 2.72 ]
  step 5 qfunc [ 3.22 3.31 3.34 2.93 ]
44: score 1.000 initialq 3.12 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.23 2.91 2.40 2.24 ]
  step 1 qfunc [ 2.38 2.43 3.05 2.40 ]
  step 2 qfunc [ 2.51 3.10 2.53 2.52 ]
  step 3 qfunc [ 2.59 2.79 2.54 3.19 ]
  step 4 qfunc [ 2.76 2.68 2.64 3.20 ]
  step 5 qfunc [ 2.59 2.44 3.47 2.56 ]
45: score 0.500 initialq 3.12 opt [[1 1 1 3 0 2]]
  step 0 qfunc [ 2.25 2.92 2.37 2.23 ]
  step 1 qfunc [ 2.37 3.06 2.47 2.40 ]
  step 2 qfunc [ 2.29 3.21 2.53 2.67 ]
  step 3 qfunc [ 2.59 2.62 2.83 3.33 ]
  step 4 qfunc [ 3.36 3.07 3.36 3.16 ]
  step 5 qfunc [ 2.55 2.49 3.38 2.56 ]
46: score 0.996 initialq 3.11 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.31 2.91 2.49 2.30 ]
  step 1 qfunc [ 2.41 2.51 3.05 2.41 ]
  step 2 qfunc [ 2.50 3.09 2.51 2.52 ]
  step 3 qfunc [ 2.54 2.86 2.53 3.21 ]
  step 4 qfunc [ 2.66 2.83 2.65 3.22 ]
  step 5 qfunc [ 2.92 2.40 3.29 2.67 ]
47: score 0.750 initialq 3.11 opt [[1 1 3 3 2 1]]
  step 0 qfunc [ 2.30 2.81 2.44 2.31 ]
  step 1 qfunc [ 2.44 3.02 2.52 2.43 ]
  step 2 qfunc [ 2.39 2.45 2.53 3.22 ]
  step 3 qfunc [ 2.39 2.42 2.58 3.24 ]
  step 4 qfunc [ 3.37 3.22 3.38 3.08 ]
  step 5 qfunc [ 3.37 3.40 3.05 3.19 ]
48: score 0.539 initialq 3.11 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.25 2.86 2.36 2.24 ]
  step 1 qfunc [ 2.38 2.42 3.04 2.38 ]
  step 2 qfunc [ 2.49 3.11 2.55 2.54 ]
  step 3 qfunc [ 2.48 3.13 2.42 2.45 ]
  step 4 qfunc [ 2.54 2.58 2.43 3.19 ]
  step 5 qfunc [ 3.20 2.88 2.74 2.79 ]
49: score 0.750 initialq 3.08 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.31 2.85 2.44 2.29 ]
  step 1 qfunc [ 2.40 2.99 2.56 2.44 ]
  step 2 qfunc [ 2.43 2.55 2.56 3.23 ]
  step 3 qfunc [ 2.54 2.52 2.78 3.25 ]
  step 4 qfunc [ 2.80 2.51 3.37 3.06 ]
  step 5 qfunc [ 3.34 3.20 3.32 3.40 ]
50: score 0.753 initialq 3.07 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.27 2.83 2.43 2.25 ]
  step 1 qfunc [ 2.40 2.96 2.53 2.41 ]
  step 2 qfunc [ 2.40 2.44 2.61 3.19 ]
  step 3 qfunc [ 2.42 2.56 2.89 3.22 ]
  step 4 qfunc [ 2.81 2.86 3.35 2.98 ]
  step 5 qfunc [ 3.37 3.26 3.14 3.04 ]
51: score 0.518 initialq 3.06 opt [[1 1 1 3 2 0]]
  step 0 qfunc [ 2.22 2.77 2.31 2.23 ]
  step 1 qfunc [ 2.34 2.98 2.41 2.35 ]
  step 2 qfunc [ 2.30 3.14 2.36 2.60 ]
  step 3 qfunc [ 2.32 2.37 2.31 3.23 ]
  step 4 qfunc [ 2.93 2.76 3.25 2.65 ]
  step 5 qfunc [ 3.26 2.81 2.94 2.71 ]
52: score 0.755 initialq 3.04 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.19 2.80 2.32 2.17 ]
  step 1 qfunc [ 2.30 2.95 2.45 2.31 ]
  step 2 qfunc [ 2.34 2.35 2.47 3.13 ]
  step 3 qfunc [ 2.59 2.49 2.69 3.16 ]
  step 4 qfunc [ 3.18 2.63 3.18 2.94 ]
  step 5 qfunc [ 3.14 2.98 3.20 3.20 ]
53: score 0.770 initialq 3.02 opt [[1 2 1 1 3 2]]
  step 0 qfunc [ 2.28 2.75 2.44 2.28 ]
  step 1 qfunc [ 2.43 2.52 2.88 2.43 ]
  step 2 qfunc [ 2.53 2.93 2.52 2.55 ]
  step 3 qfunc [ 2.55 3.03 2.60 2.76 ]
  step 4 qfunc [ 2.59 2.60 2.76 3.13 ]
  step 5 qfunc [ 2.92 2.81 3.13 3.01 ]
54: score 0.750 initialq 3.02 opt [[1 1 1 3 3 2]]
  step 0 qfunc [ 2.29 2.74 2.42 2.28 ]
  step 1 qfunc [ 2.39 2.93 2.51 2.40 ]
  step 2 qfunc [ 2.37 3.17 2.54 2.52 ]
  step 3 qfunc [ 2.43 2.41 2.55 3.32 ]
  step 4 qfunc [ 2.60 2.66 3.05 3.34 ]
  step 5 qfunc [ 2.52 2.47 3.45 2.53 ]
55: score 0.794 initialq 3.02 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.27 2.53 2.80 2.29 ]
  step 1 qfunc [ 2.35 2.93 2.37 2.40 ]
  step 2 qfunc [ 2.57 2.98 2.58 2.58 ]
  step 3 qfunc [ 2.66 3.06 2.60 2.77 ]
  step 4 qfunc [ 2.79 2.78 2.67 3.15 ]
  step 5 qfunc [ 3.04 2.85 3.16 3.09 ]
56: score 0.500 initialq 3.01 opt [[1 1 3 1 2 1]]
  step 0 qfunc [ 2.24 2.76 2.33 2.24 ]
  step 1 qfunc [ 2.36 2.92 2.45 2.38 ]
  step 2 qfunc [ 2.36 2.48 2.47 3.07 ]
  step 3 qfunc [ 2.48 3.10 2.38 2.68 ]
  step 4 qfunc [ 2.80 2.81 3.16 2.78 ]
  step 5 qfunc [ 3.15 3.18 2.55 3.10 ]
57: score 0.750 initialq 3.00 opt [[1 1 1 3 3 2]]
  step 0 qfunc [ 2.29 2.76 2.46 2.28 ]
  step 1 qfunc [ 2.42 2.91 2.56 2.45 ]
  step 2 qfunc [ 2.42 3.12 2.63 2.64 ]
  step 3 qfunc [ 2.49 2.52 2.67 3.30 ]
  step 4 qfunc [ 2.75 2.77 3.31 3.32 ]
  step 5 qfunc [ 2.57 2.48 3.34 2.55 ]
58: score 0.750 initialq 2.99 opt [[1 2 1 3 3 0]]
  step 0 qfunc [ 2.23 2.83 2.38 2.21 ]
  step 1 qfunc [ 2.34 2.52 2.93 2.37 ]
  step 2 qfunc [ 2.50 2.97 2.49 2.54 ]
  step 3 qfunc [ 2.58 2.81 2.70 3.06 ]
  step 4 qfunc [ 2.77 2.78 3.01 3.08 ]
  step 5 qfunc [ 3.09 2.52 3.07 2.67 ]
59: score 0.500 initialq 2.99 opt [[2 1 1 1 1 3]]
  step 0 qfunc [ 2.25 2.45 2.77 2.26 ]
  step 1 qfunc [ 2.29 2.90 2.35 2.32 ]
  step 2 qfunc [ 2.53 2.96 2.55 2.55 ]
  step 3 qfunc [ 2.67 3.03 2.58 2.77 ]
  step 4 qfunc [ 2.94 3.08 2.56 3.05 ]
  step 5 qfunc [ 2.66 2.57 2.60 3.10 ]
60: score 0.500 initialq 2.98 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.25 2.78 2.42 2.26 ]
  step 1 qfunc [ 2.39 2.51 2.91 2.42 ]
  step 2 qfunc [ 2.48 2.95 2.50 2.53 ]
  step 3 qfunc [ 2.49 3.05 2.57 2.74 ]
  step 4 qfunc [ 2.64 2.62 2.68 3.15 ]
  step 5 qfunc [ 3.15 3.08 3.11 2.98 ]
61: score 0.961 initialq 2.98 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.27 2.80 2.39 2.25 ]
  step 1 qfunc [ 2.37 2.92 2.54 2.41 ]
  step 2 qfunc [ 2.38 2.74 2.57 3.13 ]
  step 3 qfunc [ 2.60 2.68 3.15 2.81 ]
  step 4 qfunc [ 2.87 2.90 2.82 3.16 ]
  step 5 qfunc [ 2.51 2.42 3.15 2.52 ]
62: score 0.750 initialq 2.98 opt [[1 1 3 3 2 1]]
  step 0 qfunc [ 2.24 2.66 2.32 2.23 ]
  step 1 qfunc [ 2.31 2.88 2.41 2.32 ]
  step 2 qfunc [ 2.31 2.44 2.41 3.04 ]
  step 3 qfunc [ 2.41 2.44 2.41 3.07 ]
  step 4 qfunc [ 2.67 2.87 3.21 2.57 ]
  step 5 qfunc [ 3.10 3.22 2.89 2.60 ]
63: score 0.849 initialq 2.97 opt [[1 1 3 3 1 2]]
  step 0 qfunc [ 2.33 2.49 2.46 2.31 ]
  step 1 qfunc [ 2.43 2.81 2.53 2.45 ]
  step 2 qfunc [ 2.42 2.46 2.54 3.06 ]
  step 3 qfunc [ 2.36 2.53 2.56 3.09 ]
  step 4 qfunc [ 2.55 3.19 3.12 2.67 ]
  step 5 qfunc [ 2.42 2.31 3.31 2.50 ]
64: score 1.000 initialq 2.95 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.30 2.59 2.47 2.28 ]
  step 1 qfunc [ 2.39 2.84 2.57 2.44 ]
  step 2 qfunc [ 2.25 2.45 2.55 3.05 ]
  step 3 qfunc [ 2.34 2.37 2.66 3.08 ]
  step 4 qfunc [ 2.67 2.68 3.10 2.96 ]
  step 5 qfunc [ 3.13 3.00 3.01 3.07 ]
65: score 0.500 initialq 2.94 opt [[1 1 1 3 2 1]]
  step 0 qfunc [ 2.26 2.62 2.43 2.29 ]
  step 1 qfunc [ 2.42 2.80 2.54 2.43 ]
  step 2 qfunc [ 2.41 2.97 2.57 2.73 ]
  step 3 qfunc [ 2.34 2.54 2.56 3.03 ]
  step 4 qfunc [ 2.92 2.96 3.07 2.66 ]
  step 5 qfunc [ 3.02 3.09 2.76 2.94 ]
66: score 0.996 initialq 2.93 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 2.70 2.49 2.28 ]
  step 1 qfunc [ 2.41 2.86 2.59 2.44 ]
  step 2 qfunc [ 2.36 2.55 2.57 3.04 ]
  step 3 qfunc [ 2.48 2.72 3.07 2.68 ]
  step 4 qfunc [ 2.83 2.95 2.58 3.08 ]
  step 5 qfunc [ 3.07 2.53 3.09 2.66 ]
67: score 0.991 initialq 2.93 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.33 2.49 2.67 2.32 ]
  step 1 qfunc [ 2.39 2.84 2.39 2.40 ]
  step 2 qfunc [ 2.55 2.90 2.55 2.56 ]
  step 3 qfunc [ 2.59 2.68 2.51 3.00 ]
  step 4 qfunc [ 2.66 2.69 2.57 3.01 ]
  step 5 qfunc [ 2.57 2.49 3.30 2.54 ]
68: score 0.500 initialq 2.93 opt [[1 1 1 3 2 0]]
  step 0 qfunc [ 2.26 2.69 2.41 2.28 ]
  step 1 qfunc [ 2.39 2.84 2.51 2.40 ]
  step 2 qfunc [ 2.39 3.04 2.58 2.58 ]
  step 3 qfunc [ 2.39 2.43 2.62 3.13 ]
  step 4 qfunc [ 2.78 2.96 3.14 2.82 ]
  step 5 qfunc [ 3.16 3.07 3.15 2.94 ]
69: score 0.513 initialq 2.92 opt [[1 1 1 3 2 1]]
  step 0 qfunc [ 2.24 2.67 2.38 2.23 ]
  step 1 qfunc [ 2.36 2.80 2.47 2.36 ]
  step 2 qfunc [ 2.35 2.99 2.52 2.60 ]
  step 3 qfunc [ 2.37 2.51 2.62 3.14 ]
  step 4 qfunc [ 2.66 2.67 3.15 2.71 ]
  step 5 qfunc [ 3.13 3.17 2.91 3.01 ]
70: score 0.751 initialq 2.91 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.26 2.53 2.37 2.25 ]
  step 1 qfunc [ 2.34 2.76 2.47 2.36 ]
  step 2 qfunc [ 2.36 2.40 2.59 2.93 ]
  step 3 qfunc [ 2.36 2.47 2.59 2.96 ]
  step 4 qfunc [ 3.01 2.78 3.07 2.63 ]
  step 5 qfunc [ 3.09 2.96 2.79 2.69 ]
71: score 0.750 initialq 2.90 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.25 2.61 2.38 2.24 ]
  step 1 qfunc [ 2.35 2.80 2.43 2.36 ]
  step 2 qfunc [ 2.34 2.52 2.43 3.01 ]
  step 3 qfunc [ 2.62 2.44 2.48 3.04 ]
  step 4 qfunc [ 2.97 2.57 3.21 2.61 ]
  step 5 qfunc [ 3.23 3.17 2.95 3.01 ]
72: score 0.750 initialq 2.90 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.25 2.46 2.63 2.26 ]
  step 1 qfunc [ 2.31 2.79 2.31 2.34 ]
  step 2 qfunc [ 2.49 2.86 2.48 2.52 ]
  step 3 qfunc [ 2.54 2.98 2.51 2.74 ]
  step 4 qfunc [ 2.71 2.66 2.67 3.11 ]
  step 5 qfunc [ 2.99 2.83 3.11 2.90 ]
73: score 0.500 initialq 2.90 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.24 2.46 2.60 2.28 ]
  step 1 qfunc [ 2.29 2.81 2.31 2.34 ]
  step 2 qfunc [ 2.50 2.89 2.48 2.52 ]
  step 3 qfunc [ 2.51 2.70 2.46 2.97 ]
  step 4 qfunc [ 2.83 2.52 2.43 2.99 ]
  step 5 qfunc [ 2.56 2.48 3.16 2.54 ]
74: score 0.750 initialq 2.87 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.30 2.60 2.47 2.29 ]
  step 1 qfunc [ 2.41 2.47 2.79 2.41 ]
  step 2 qfunc [ 2.52 2.85 2.50 2.53 ]
  step 3 qfunc [ 2.56 2.94 2.55 2.69 ]
  step 4 qfunc [ 2.61 2.66 2.63 3.05 ]
  step 5 qfunc [ 2.74 2.66 2.80 3.04 ]
75: score 0.750 initialq 2.84 opt [[1 1 3 3 2 1]]
  step 0 qfunc [ 2.28 2.61 2.37 2.28 ]
  step 1 qfunc [ 2.36 2.80 2.43 2.37 ]
  step 2 qfunc [ 2.31 2.43 2.42 3.00 ]
  step 3 qfunc [ 2.39 2.43 2.43 3.03 ]
  step 4 qfunc [ 2.82 2.55 3.15 2.72 ]
  step 5 qfunc [ 3.12 3.17 3.00 2.88 ]
76: score 0.500 initialq 2.84 opt [[1 1 1 3 3 2]]
  step 0 qfunc [ 2.29 2.58 2.42 2.30 ]
  step 1 qfunc [ 2.37 2.73 2.50 2.39 ]
  step 2 qfunc [ 2.37 2.88 2.53 2.48 ]
  step 3 qfunc [ 2.40 2.65 2.56 2.99 ]
  step 4 qfunc [ 2.58 2.60 2.98 3.00 ]
  step 5 qfunc [ 2.52 2.40 3.02 2.52 ]
77: score 0.888 initialq 2.83 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.27 2.62 2.41 2.24 ]
  step 1 qfunc [ 2.37 2.45 2.75 2.39 ]
  step 2 qfunc [ 2.49 2.80 2.49 2.50 ]
  step 3 qfunc [ 2.51 2.81 2.52 2.91 ]
  step 4 qfunc [ 2.61 2.77 2.60 2.92 ]
  step 5 qfunc [ 2.61 2.32 3.41 2.56 ]
78: score 0.750 initialq 2.83 opt [[1 1 3 2 1 3]]
  step 0 qfunc [ 2.24 2.52 2.44 2.24 ]
  step 1 qfunc [ 2.33 2.70 2.49 2.39 ]
  step 2 qfunc [ 2.45 2.56 2.60 2.85 ]
  step 3 qfunc [ 2.50 2.59 2.89 2.70 ]
  step 4 qfunc [ 2.65 2.91 2.51 2.72 ]
  step 5 qfunc [ 2.75 2.56 2.51 2.95 ]
79: score 0.750 initialq 2.82 opt [[1 3 1 3 2 3]]
  step 0 qfunc [ 2.28 2.44 2.39 2.30 ]
  step 1 qfunc [ 2.38 2.39 2.48 2.73 ]
  step 2 qfunc [ 2.33 2.81 2.48 2.35 ]
  step 3 qfunc [ 2.22 2.35 2.44 2.93 ]
  step 4 qfunc [ 2.55 2.21 2.96 2.64 ]
  step 5 qfunc [ 2.83 2.59 2.52 2.97 ]
80: score 0.500 initialq 2.80 opt [[1 2 1 1 3 1]]
  step 0 qfunc [ 2.28 2.58 2.43 2.28 ]
  step 1 qfunc [ 2.41 2.48 2.72 2.41 ]
  step 2 qfunc [ 2.52 2.78 2.48 2.51 ]
  step 3 qfunc [ 2.61 2.87 2.50 2.69 ]
  step 4 qfunc [ 2.69 2.71 2.49 2.93 ]
  step 5 qfunc [ 2.93 2.93 2.61 2.77 ]
81: score 0.526 initialq 2.77 opt [[1 1 1 1 3 2]]
  step 0 qfunc [ 2.30 2.43 2.40 2.31 ]
  step 1 qfunc [ 2.38 2.53 2.47 2.40 ]
  step 2 qfunc [ 2.35 2.81 2.47 2.44 ]
  step 3 qfunc [ 2.35 2.87 2.48 2.59 ]
  step 4 qfunc [ 2.27 2.39 2.44 2.89 ]
  step 5 qfunc [ 2.55 2.44 2.91 2.54 ]
82: score 0.712 initialq 2.76 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.20 2.37 2.52 2.24 ]
  step 1 qfunc [ 2.29 2.72 2.32 2.37 ]
  step 2 qfunc [ 2.48 2.80 2.45 2.49 ]
  step 3 qfunc [ 2.45 2.91 2.44 2.46 ]
  step 4 qfunc [ 2.46 2.35 2.44 2.97 ]
  step 5 qfunc [ 2.79 2.00 2.97 2.64 ]
83: score 0.500 initialq 2.73 opt [[2 1 3 1 3 0]]
  step 0 qfunc [ 2.30 2.43 2.45 2.29 ]
  step 1 qfunc [ 2.34 2.65 2.37 2.38 ]
  step 2 qfunc [ 2.49 2.35 2.50 2.73 ]
  step 3 qfunc [ 2.53 2.76 2.50 2.55 ]
  step 4 qfunc [ 2.76 2.60 2.35 2.83 ]
  step 5 qfunc [ 2.84 2.48 2.06 2.58 ]
84: score 0.750 initialq 2.72 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.29 2.45 2.44 2.28 ]
  step 1 qfunc [ 2.42 2.45 2.49 2.44 ]
  step 2 qfunc [ 2.49 2.51 2.49 2.49 ]
  step 3 qfunc [ 2.51 2.55 2.49 2.53 ]
  step 4 qfunc [ 2.55 2.54 2.56 2.62 ]
  step 5 qfunc [ 2.57 2.52 2.59 2.65 ]
85: score 0.750 initialq 2.67 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.28 2.40 2.39 2.26 ]
  step 1 qfunc [ 2.37 2.39 2.44 2.39 ]
  step 2 qfunc [ 2.43 2.46 2.44 2.44 ]
  step 3 qfunc [ 2.43 2.49 2.46 2.47 ]
  step 4 qfunc [ 2.46 2.48 2.49 2.54 ]
  step 5 qfunc [ 2.47 2.52 2.51 2.56 ]
86: score 0.853 initialq 2.65 opt [[1 2 1 1 3 2]]
  step 0 qfunc [ 2.30 2.47 2.44 2.32 ]
  step 1 qfunc [ 2.44 2.47 2.51 2.45 ]
  step 2 qfunc [ 2.51 2.52 2.50 2.50 ]
  step 3 qfunc [ 2.52 2.60 2.49 2.54 ]
  step 4 qfunc [ 2.55 2.50 2.51 2.69 ]
  step 5 qfunc [ 2.54 2.36 2.70 2.54 ]
87: score 0.750 initialq 2.62 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.30 2.46 2.46 2.30 ]
  step 1 qfunc [ 2.40 2.43 2.51 2.42 ]
  step 2 qfunc [ 2.51 2.53 2.50 2.51 ]
  step 3 qfunc [ 2.52 2.62 2.50 2.53 ]
  step 4 qfunc [ 2.54 2.59 2.54 2.70 ]
  step 5 qfunc [ 2.57 2.55 2.58 2.71 ]
88: score 0.536 initialq 2.58 opt [[1 2 1 0 1 3]]
  step 0 qfunc [ 2.32 2.51 2.49 2.32 ]
  step 1 qfunc [ 2.45 2.46 2.56 2.48 ]
  step 2 qfunc [ 2.55 2.56 2.56 2.56 ]
  step 3 qfunc [ 2.57 2.57 2.57 2.56 ]
  step 4 qfunc [ 2.57 2.60 2.54 2.58 ]
  step 5 qfunc [ 2.58 2.52 2.52 2.65 ]
89: score 1.000 initialq 2.56 opt [[1 2 2 1 3 3]]
  step 0 qfunc [ 2.24 2.41 2.37 2.27 ]
  step 1 qfunc [ 2.35 2.37 2.46 2.38 ]
  step 2 qfunc [ 2.44 2.46 2.48 2.44 ]
  step 3 qfunc [ 2.47 2.49 2.48 2.46 ]
  step 4 qfunc [ 2.49 2.50 2.50 2.52 ]
  step 5 qfunc [ 2.50 2.47 2.53 2.55 ]
90: score 0.736 initialq 2.55 opt [[2 1 1 1 3 3]]
  step 0 qfunc [ 2.31 2.49 2.49 2.32 ]
  step 1 qfunc [ 2.43 2.54 2.41 2.43 ]
  step 2 qfunc [ 2.54 2.55 2.55 2.54 ]
  step 3 qfunc [ 2.54 2.60 2.56 2.52 ]
  step 4 qfunc [ 2.56 2.64 2.59 2.67 ]
  step 5 qfunc [ 2.58 2.55 2.61 2.70 ]
91: score 0.750 initialq 2.54 opt [[1 2 1 1 2 1]]
  step 0 qfunc [ 2.26 2.37 2.34 2.26 ]
  step 1 qfunc [ 2.35 2.38 2.41 2.38 ]
  step 2 qfunc [ 2.41 2.42 2.40 2.41 ]
  step 3 qfunc [ 2.42 2.44 2.43 2.41 ]
  step 4 qfunc [ 2.44 2.45 2.47 2.44 ]
  step 5 qfunc [ 2.46 2.50 2.43 2.46 ]
92: score 0.500 initialq 2.51 opt [[1 2 1 3 0 3]]
  step 0 qfunc [ 2.29 2.45 2.42 2.28 ]
  step 1 qfunc [ 2.42 2.42 2.50 2.45 ]
  step 2 qfunc [ 2.50 2.51 2.49 2.50 ]
  step 3 qfunc [ 2.52 2.54 2.45 2.55 ]
  step 4 qfunc [ 2.60 2.49 2.46 2.57 ]
  step 5 qfunc [ 2.62 2.50 2.48 2.66 ]
93: score 0.250 initialq 2.49 opt [[1 2 0 0 0 0]]
  step 0 qfunc [ 2.30 2.46 2.46 2.30 ]
  step 1 qfunc [ 2.43 2.42 2.51 2.44 ]
  step 2 qfunc [ 2.52 2.48 2.51 2.52 ]
  step 3 qfunc [ 2.53 2.50 2.52 2.53 ]
  step 4 qfunc [ 2.54 2.52 2.53 2.53 ]
  step 5 qfunc [ 2.54 2.53 2.54 2.53 ]
94: score 0.646 initialq 2.48 opt [[2 1 0 1 1 0]]
  step 0 qfunc [ 2.30 2.43 2.43 2.29 ]
  step 1 qfunc [ 2.40 2.48 2.40 2.44 ]
  step 2 qfunc [ 2.49 2.49 2.49 2.48 ]
  step 3 qfunc [ 2.49 2.51 2.49 2.48 ]
  step 4 qfunc [ 2.51 2.52 2.44 2.51 ]
  step 5 qfunc [ 2.55 2.49 2.50 2.53 ]
95: score 0.500 initialq 2.46 opt [[1 2 1 1 1 0]]
  step 0 qfunc [ 2.30 2.44 2.43 2.30 ]
  step 1 qfunc [ 2.40 2.41 2.49 2.43 ]
  step 2 qfunc [ 2.48 2.49 2.48 2.49 ]
  step 3 qfunc [ 2.48 2.50 2.49 2.48 ]
  step 4 qfunc [ 2.50 2.51 2.50 2.50 ]
  step 5 qfunc [ 2.52 2.50 2.51 2.52 ]
96: score 0.750 initialq 2.44 opt [[1 2 1 2 1 1]]
  step 0 qfunc [ 2.26 2.42 2.38 2.28 ]
  step 1 qfunc [ 2.37 2.40 2.46 2.40 ]
  step 2 qfunc [ 2.46 2.46 2.46 2.46 ]
  step 3 qfunc [ 2.46 2.46 2.46 2.46 ]
  step 4 qfunc [ 2.47 2.48 2.46 2.46 ]
  step 5 qfunc [ 2.48 2.49 2.47 2.47 ]
97: score 0.828 initialq 2.42 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.22 2.38 2.36 2.24 ]
  step 1 qfunc [ 2.33 2.36 2.43 2.36 ]
  step 2 qfunc [ 2.43 2.44 2.42 2.43 ]
  step 3 qfunc [ 2.45 2.43 2.45 2.45 ]
  step 4 qfunc [ 2.47 2.41 2.46 2.48 ]
  step 5 qfunc [ 2.01 1.87 2.76 2.01 ]
98: score 0.750 initialq 2.41 opt [[1 2 2 1 3 0]]
  step 0 qfunc [ 2.24 2.36 2.34 2.25 ]
  step 1 qfunc [ 2.34 2.35 2.41 2.34 ]
  step 2 qfunc [ 2.40 2.42 2.43 2.39 ]
  step 3 qfunc [ 2.42 2.46 2.44 2.43 ]
  step 4 qfunc [ 2.48 2.47 2.45 2.48 ]
  step 5 qfunc [ 2.50 2.48 2.44 2.47 ]
99: score 0.750 initialq 2.41 opt [[1 2 1 1 2 1]]
  step 0 qfunc [ 2.30 2.39 2.38 2.29 ]
  step 1 qfunc [ 2.37 2.38 2.43 2.39 ]
  step 2 qfunc [ 2.43 2.44 2.42 2.43 ]
  step 3 qfunc [ 2.43 2.45 2.44 2.43 ]
  step 4 qfunc [ 2.44 2.46 2.46 2.43 ]
  step 5 qfunc [ 2.47 2.48 2.44 2.46 ]

In [39]:
'''
Let's look for the cases where the policy is correct until the last step, and the last step is wrong.
And good models.
'''
good2 = [5,6,8,9,10,11,12,13,14,15,17,18,20,21,22,23,24,26,27,29,30,31,33,35,36,40,43,44,46,61,66,67,77,97] # last steps end up being 2
good3 = [4,89] # last steps end up being 3
final2 = [0,1,3,7,16,25,38,39,41,47,49,50,52,58,62,64,70,71,75,98] # last step should've been 2
final3 = [] # last step should've been 3

six.print_(len(good2))
six.print_(len(final2))

# now we can do a preliminary robust matrix evaluation for the good models and the last step should be 2 models
model_ixs = np.concatenate([good2, final2])
six.print_(model_ixs)
rmat = np.zeros((model_ixs.shape[0],model_ixs.shape[0]))
# rmat[rmodel,cmodel] = the value of rmodel's policy in cmodel
for pix in six.moves.range(model_ixs.shape[0]):
    policy = sorted_opts[model_ixs[pix],0,:]
    last_act = policy[-1]
    #six.print_(last_act)
    for eix in six.moves.range(model_ixs.shape[0]):
        # qfunc of last step
        last_q = sorted_qfuncs[model_ixs[eix],-1,:]
        #six.print_(last_q)
        rmat[pix,eix] = last_q[last_act]
#six.print_(rmat)
six.print_(np.mean(rmat,axis=1))
six.print_(np.min(rmat,axis=1))


34
20
[ 5  6  8  9 10 11 12 13 14 15 17 18 20 21 22 23 24 26 27 29 30 31 33 35 36
 40 43 44 46 61 66 67 77 97  0  1  3  7 16 25 38 39 41 47 49 50 52 58 62 64
 70 71 75 98]
[ 3.24421266  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266
  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266
  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266
  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266
  2.85016024  3.24421266  3.24421266  3.24421266  3.24421266  3.24421266
  3.24421266  3.24421266  3.24421266  3.24421266  3.0894189   3.0894189
  3.0894189   3.0894189   3.0894189   2.85016024  3.0894189   3.0894189
  2.85016024  2.90954679  2.85016024  3.0894189   2.85016024  3.0894189
  2.90954679  3.0894189   3.0894189   3.0894189   2.90954679  3.0894189 ]
[ 2.44229973  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973
  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973
  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973
  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973
  2.01435482  2.44229973  2.44229973  2.44229973  2.44229973  2.44229973
  2.44229973  2.44229973  2.44229973  2.44229973  2.01007669  2.01007669
  2.01007669  2.01007669  2.01007669  2.01435482  2.01007669  2.01007669
  2.01435482  1.87256843  2.01435482  2.01007669  2.01435482  2.01007669
  1.87256843  2.01007669  2.01007669  2.01007669  1.87256843  2.01007669]

In [24]:
'''
Now let's look at the extended version with all 100 models for no dropout.
'''
data11 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')

data21 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

data51 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')

data61 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runB.npz')

data71 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runB.npz')

vloss = np.concatenate([data11['vloss'],data12['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])[:,0,:]
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:]])

# each row is a policy
evals = np.vstack([data71['evals'],data72['evals']]).T
#six.print_(evals)

eval_avg = np.mean(evals,axis=1)
sorted_avg_ix = np.flip(np.argsort(eval_avg), 0)

eval_min = np.min(evals,axis=1)
sorted_min_ix = np.flip(np.argsort(eval_min), 0)

eval_per = np.percentile(evals,0.25,axis=1)
sorted_per_ix = np.flip(np.argsort(eval_per), 0)

#six.print_(sorted_avg_ix)
#six.print_(sorted_min_ix)

for r in six.moves.range(evals.shape[0]):
    ix = sorted_per_ix[r]
    six.print_('model_ix {:2d}: policy {} score {:.3f} initialq {:.3f} eval_avg {:.3f} eval_min {:.3f} per {:.3f}'.format(
        ix, opts[ix,:], scores[ix], initialq[ix], eval_avg[ix], eval_min[ix], eval_per[ix]))


model_ix  9: policy [1 3 1 1 3 2] score 0.706 initialq 3.841 eval_avg 3.940 eval_min 3.776 per 3.782
model_ix 29: policy [1 3 1 1 3 2] score 0.500 initialq 3.935 eval_avg 3.940 eval_min 3.762 per 3.774
model_ix 60: policy [1 3 1 3 2 3] score 0.682 initialq 3.936 eval_avg 3.815 eval_min 2.993 per 3.009
model_ix  7: policy [1 2 3 1 3 2] score 0.750 initialq 3.949 eval_avg 3.454 eval_min 2.985 per 2.985
model_ix 20: policy [1 2 3 1 3 2] score 0.750 initialq 3.906 eval_avg 3.454 eval_min 2.984 per 2.984
model_ix 61: policy [1 2 3 1 3 2] score 0.714 initialq 3.922 eval_avg 3.454 eval_min 2.984 per 2.984
model_ix 26: policy [1 2 3 1 3 2] score 0.839 initialq 3.953 eval_avg 3.453 eval_min 2.982 per 2.983
model_ix 42: policy [1 2 3 1 3 2] score 0.750 initialq 3.963 eval_avg 3.454 eval_min 2.981 per 2.982
model_ix 84: policy [1 2 0 1 3 3] score 0.766 initialq 3.920 eval_avg 3.680 eval_min 2.912 per 2.922
model_ix 52: policy [1 2 0 1 3 3] score 0.773 initialq 3.956 eval_avg 3.679 eval_min 2.913 per 2.919
model_ix 16: policy [1 2 1 3 3 2] score 0.977 initialq 3.956 eval_avg 3.517 eval_min 2.901 per 2.912
model_ix 64: policy [1 1 2 0 3 2] score 0.750 initialq 3.950 eval_avg 3.314 eval_min 2.905 per 2.910
model_ix 14: policy [1 2 1 3 3 2] score 0.500 initialq 3.939 eval_avg 3.518 eval_min 2.899 per 2.910
model_ix 45: policy [1 1 2 0 3 2] score 0.750 initialq 3.955 eval_avg 3.315 eval_min 2.899 per 2.905
model_ix  0: policy [1 2 1 3 3 2] score 0.987 initialq 3.936 eval_avg 3.519 eval_min 2.899 per 2.905
model_ix 44: policy [2 1 3 1 3 2] score 0.542 initialq 3.934 eval_avg 3.488 eval_min 2.868 per 2.891
model_ix 55: policy [1 3 1 3 3 2] score 0.750 initialq 3.920 eval_avg 3.846 eval_min 2.730 per 2.890
model_ix 46: policy [1 3 1 3 3 2] score 0.750 initialq 3.944 eval_avg 3.846 eval_min 2.734 per 2.889
model_ix 11: policy [1 3 1 3 3 2] score 0.753 initialq 3.924 eval_avg 3.846 eval_min 2.728 per 2.889
model_ix  6: policy [1 1 2 3 2 3] score 0.924 initialq 3.953 eval_avg 3.468 eval_min 2.875 per 2.875
model_ix 91: policy [1 1 2 3 2 3] score 0.500 initialq 3.936 eval_avg 3.470 eval_min 2.865 per 2.871
model_ix 36: policy [1 1 2 3 2 3] score 1.000 initialq 3.949 eval_avg 3.469 eval_min 2.868 per 2.870
model_ix 23: policy [2 1 1 3 3 3] score 0.750 initialq 3.942 eval_avg 3.449 eval_min 2.813 per 2.854
model_ix 80: policy [2 1 3 1 3 3] score 0.755 initialq 3.963 eval_avg 3.583 eval_min 2.843 per 2.845
model_ix 85: policy [2 1 1 3 3 3] score 0.750 initialq 3.947 eval_avg 3.451 eval_min 2.800 per 2.844
model_ix 89: policy [1 2 3 1 3 3] score 0.750 initialq 3.950 eval_avg 3.606 eval_min 2.785 per 2.833
model_ix 41: policy [1 2 3 1 3 3] score 0.750 initialq 3.955 eval_avg 3.606 eval_min 2.781 per 2.829
model_ix 39: policy [1 3 1 3 0 2] score 0.586 initialq 3.929 eval_avg 3.743 eval_min 2.810 per 2.821
model_ix  1: policy [1 2 1 3 2 3] score 0.953 initialq 3.961 eval_avg 3.531 eval_min 2.752 per 2.780
model_ix 73: policy [1 2 1 3 3 3] score 0.747 initialq 3.955 eval_avg 3.464 eval_min 2.721 per 2.730
model_ix 81: policy [1 2 1 3 3 3] score 0.690 initialq 3.952 eval_avg 3.464 eval_min 2.706 per 2.719
model_ix 34: policy [1 2 1 3 3 0] score 0.750 initialq 3.948 eval_avg 3.366 eval_min 2.685 per 2.713
model_ix 43: policy [1 2 1 3 3 3] score 0.742 initialq 3.939 eval_avg 3.464 eval_min 2.695 per 2.712
model_ix 62: policy [1 2 1 3 3 3] score 0.750 initialq 3.950 eval_avg 3.463 eval_min 2.691 per 2.708
model_ix 76: policy [1 3 2 3 1 3] score 0.703 initialq 3.929 eval_avg 3.487 eval_min 2.656 per 2.690
model_ix 70: policy [1 1 3 0 3 2] score 0.750 initialq 3.937 eval_avg 3.705 eval_min 2.576 per 2.681
model_ix 35: policy [1 1 3 0 3 2] score 0.742 initialq 3.930 eval_avg 3.706 eval_min 2.569 per 2.677
model_ix 58: policy [1 2 3 1 1 3] score 0.500 initialq 3.950 eval_avg 3.674 eval_min 2.619 per 2.665
model_ix 96: policy [1 1 3 0 3 2] score 0.747 initialq 3.919 eval_avg 3.705 eval_min 2.546 per 2.660
model_ix 83: policy [1 1 3 0 3 2] score 0.500 initialq 3.925 eval_avg 3.705 eval_min 2.540 per 2.655
model_ix 19: policy [1 2 1 3 0 2] score 0.750 initialq 3.955 eval_avg 3.292 eval_min 2.640 per 2.646
model_ix 82: policy [1 2 1 3 2 2] score 0.750 initialq 3.956 eval_avg 3.256 eval_min 2.627 per 2.629
model_ix 98: policy [1 2 1 3 2 2] score 0.750 initialq 3.923 eval_avg 3.256 eval_min 2.620 per 2.623
model_ix 37: policy [1 1 3 3 3 2] score 0.750 initialq 3.943 eval_avg 3.721 eval_min 2.434 per 2.586
model_ix 32: policy [1 2 1 3 1 3] score 0.750 initialq 3.945 eval_avg 3.526 eval_min 2.572 per 2.583
model_ix 87: policy [1 1 3 3 3 2] score 0.750 initialq 3.941 eval_avg 3.723 eval_min 2.428 per 2.583
model_ix 95: policy [1 1 3 3 3 2] score 0.727 initialq 3.899 eval_avg 3.720 eval_min 2.419 per 2.576
model_ix 74: policy [1 1 3 3 3 2] score 0.719 initialq 3.921 eval_avg 3.721 eval_min 2.417 per 2.574
model_ix 77: policy [1 1 3 3 3 2] score 0.750 initialq 3.928 eval_avg 3.722 eval_min 2.413 per 2.570
model_ix 54: policy [1 1 3 3 3 2] score 0.591 initialq 3.942 eval_avg 3.720 eval_min 2.409 per 2.568
model_ix 79: policy [2 1 1 0 1 3] score 0.534 initialq 3.950 eval_avg 3.584 eval_min 2.540 per 2.549
model_ix 72: policy [2 1 1 3 1 3] score 0.750 initialq 3.949 eval_avg 3.517 eval_min 2.424 per 2.516
model_ix 38: policy [1 1 3 3 2 3] score 0.750 initialq 3.914 eval_avg 3.672 eval_min 2.324 per 2.497
model_ix 50: policy [1 1 3 2 3 3] score 0.880 initialq 3.958 eval_avg 3.487 eval_min 2.472 per 2.495
model_ix 51: policy [1 1 3 3 2 3] score 0.750 initialq 3.932 eval_avg 3.672 eval_min 2.321 per 2.493
model_ix 67: policy [1 1 3 3 2 3] score 0.750 initialq 3.932 eval_avg 3.673 eval_min 2.264 per 2.452
model_ix 63: policy [2 1 1 3 0 3] score 0.750 initialq 3.942 eval_avg 3.412 eval_min 2.341 per 2.447
model_ix 28: policy [1 1 3 1 2 1] score 0.750 initialq 3.930 eval_avg 3.320 eval_min 2.326 per 2.426
model_ix  8: policy [1 1 2 1 2 3] score 0.500 initialq 3.928 eval_avg 3.493 eval_min 2.386 per 2.386
model_ix  3: policy [2 1 1 1 2 3] score 0.750 initialq 3.917 eval_avg 3.508 eval_min 2.311 per 2.355
model_ix 22: policy [1 1 3 1 2 3] score 0.750 initialq 3.941 eval_avg 3.690 eval_min 2.300 per 2.348
model_ix 15: policy [1 2 1 3 3 1] score 0.740 initialq 3.943 eval_avg 3.436 eval_min 2.190 per 2.333
model_ix 25: policy [1 1 3 1 0 2] score 0.500 initialq 3.930 eval_avg 3.445 eval_min 2.317 per 2.331
model_ix 71: policy [1 2 1 3 3 1] score 0.750 initialq 3.940 eval_avg 3.433 eval_min 2.189 per 2.330
model_ix  4: policy [1 2 1 3 3 1] score 0.747 initialq 3.951 eval_avg 3.433 eval_min 2.185 per 2.326
model_ix 97: policy [1 1 3 1 2 3] score 0.750 initialq 3.940 eval_avg 3.690 eval_min 2.260 per 2.324
model_ix 86: policy [1 2 1 1 3 2] score 0.750 initialq 3.951 eval_avg 3.580 eval_min 2.184 per 2.322
model_ix 78: policy [1 1 3 1 2 3] score 0.750 initialq 3.938 eval_avg 3.688 eval_min 2.275 per 2.315
model_ix 17: policy [1 1 3 0 0 2] score 0.503 initialq 3.932 eval_avg 3.452 eval_min 2.258 per 2.267
model_ix 10: policy [1 1 3 0 1 2] score 0.500 initialq 3.932 eval_avg 3.481 eval_min 2.232 per 2.265
model_ix 94: policy [1 2 1 3 0 0] score 0.500 initialq 3.968 eval_avg 3.106 eval_min 2.260 per 2.262
model_ix 56: policy [1 2 1 1 0 3] score 0.750 initialq 3.943 eval_avg 3.444 eval_min 2.181 per 2.247
model_ix 27: policy [1 1 3 3 0 2] score 0.669 initialq 3.914 eval_avg 3.634 eval_min 2.168 per 2.241
model_ix 69: policy [1 2 1 1 3 0] score 0.599 initialq 3.948 eval_avg 3.451 eval_min 2.140 per 2.234
model_ix 47: policy [1 1 3 3 0 2] score 0.599 initialq 3.943 eval_avg 3.638 eval_min 2.168 per 2.231
model_ix 99: policy [1 2 1 1 3 0] score 0.638 initialq 3.951 eval_avg 3.451 eval_min 2.133 per 2.231
model_ix 75: policy [1 1 3 3 0 2] score 0.714 initialq 3.939 eval_avg 3.637 eval_min 2.161 per 2.229
model_ix 33: policy [1 1 3 3 0 2] score 0.750 initialq 3.897 eval_avg 3.635 eval_min 2.160 per 2.219
model_ix 30: policy [1 1 3 3 0 2] score 0.615 initialq 3.938 eval_avg 3.636 eval_min 2.148 per 2.219
model_ix 31: policy [1 1 3 3 0 2] score 0.612 initialq 3.928 eval_avg 3.634 eval_min 2.135 per 2.199
model_ix 24: policy [1 2 1 1 1 3] score 0.539 initialq 3.828 eval_avg 3.492 eval_min 2.192 per 2.197
model_ix 21: policy [1 1 3 3 1 2] score 0.750 initialq 3.938 eval_avg 3.580 eval_min 2.111 per 2.157
model_ix 13: policy [1 1 3 2 2 3] score 1.000 initialq 3.941 eval_avg 3.465 eval_min 2.066 per 2.141
model_ix 90: policy [1 2 3 3 1 3] score 0.526 initialq 3.947 eval_avg 3.367 eval_min 2.138 per 2.141
model_ix  2: policy [1 2 3 3 1 3] score 0.500 initialq 3.944 eval_avg 3.368 eval_min 2.134 per 2.138
model_ix 18: policy [1 2 3 3 1 3] score 0.596 initialq 3.919 eval_avg 3.367 eval_min 2.138 per 2.138
model_ix 12: policy [1 2 3 3 1 3] score 0.510 initialq 3.947 eval_avg 3.367 eval_min 2.138 per 2.138
model_ix 88: policy [1 1 3 2 2 3] score 0.880 initialq 3.956 eval_avg 3.463 eval_min 2.062 per 2.137
model_ix 65: policy [1 2 3 3 1 3] score 0.500 initialq 3.939 eval_avg 3.367 eval_min 2.135 per 2.136
model_ix 93: policy [1 1 3 2 2 3] score 0.997 initialq 3.941 eval_avg 3.464 eval_min 2.062 per 2.136
model_ix 48: policy [1 1 3 3 2 2] score 0.911 initialq 3.941 eval_avg 3.580 eval_min 2.066 per 2.129
model_ix 49: policy [1 2 1 1 3 1] score 0.526 initialq 3.931 eval_avg 3.463 eval_min 2.028 per 2.128
model_ix  5: policy [1 1 3 2 2 3] score 0.602 initialq 3.929 eval_avg 3.465 eval_min 2.061 per 2.125
model_ix 66: policy [1 1 3 3 2 2] score 0.833 initialq 3.953 eval_avg 3.581 eval_min 2.061 per 2.125
model_ix 53: policy [1 1 3 3 2 2] score 0.964 initialq 3.945 eval_avg 3.578 eval_min 2.065 per 2.124
model_ix 40: policy [1 1 3 3 2 2] score 0.773 initialq 3.923 eval_avg 3.581 eval_min 2.058 per 2.122
model_ix 68: policy [1 2 1 1 3 1] score 0.500 initialq 3.945 eval_avg 3.463 eval_min 2.028 per 2.122
model_ix 59: policy [1 2 1 1 3 3] score 0.750 initialq 3.928 eval_avg 3.534 eval_min 2.096 per 2.117
model_ix 57: policy [1 1 3 2 1 0] score 0.500 initialq 3.947 eval_avg 2.995 eval_min 2.087 per 2.117
model_ix 92: policy [1 2 1 1 0 1] score 0.500 initialq 3.916 eval_avg 2.914 eval_min 2.040 per 2.043

In [28]:
'''
Now let's look at the extended version with all 100 models for with dropout.
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runC.npz')
data13 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runD.npz')

data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runC.npz')
data23 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real1-runD.npz')

data31 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runC.npz')
data33 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real0-runD.npz')

data41 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runA.npz')
data42 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runC.npz')
data43 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runD.npz')

data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runC.npz')
data53 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runD.npz')

data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runC.npz')
data63 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runD.npz')

data71 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runC.npz')
data73 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rme-rtype1-trajectories500-runD.npz')

vloss = np.concatenate([data11['vloss'],data12['vloss'],data13['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0],data23['scores'][:,0]])
rewards = np.concatenate([data41['rewards'][:,0],data42['rewards'][:,0],data43['rewards'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0],data53['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts'],data63['opts']])[:,0,:]
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:],data63['qs'][:,0,:,:]])

# each row is a policy
evals = np.vstack([data71['evals'],data72['evals'],data73['evals']]).T
#six.print_(evals)

eval_avg = np.mean(evals,axis=1)
sorted_avg_ix = np.flip(np.argsort(eval_avg), 0)

eval_min = np.min(evals,axis=1)
sorted_min_ix = np.flip(np.argsort(eval_min), 0)

eval_per = np.percentile(evals,0.25,axis=1)
sorted_per_ix = np.flip(np.argsort(eval_per), 0)

#six.print_(sorted_avg_ix)
#six.print_(sorted_min_ix)

for r in six.moves.range(evals.shape[0]):
    ix = sorted_per_ix[r]
    six.print_('model_ix {:2d}: policy {} score {:.3f} initialq {:.3f} eval_avg {:.3f} eval_min {:.3f} per {:.3f}'.format(
        ix, opts[ix,:], scores[ix], initialq[ix], eval_avg[ix], eval_min[ix], eval_per[ix]))


model_ix 73: policy [1 2 1 1 3 2] score 0.853 initialq 2.653 eval_avg 2.752 eval_min 2.411 per 2.413
model_ix 71: policy [1 2 1 1 3 2] score 0.770 initialq 3.019 eval_avg 2.750 eval_min 2.411 per 2.412
model_ix 75: policy [1 2 1 1 2 1] score 0.750 initialq 2.544 eval_avg 2.511 eval_min 2.404 per 2.408
model_ix 52: policy [1 2 1 1 2 1] score 0.750 initialq 2.410 eval_avg 2.511 eval_min 2.403 per 2.408
model_ix 29: policy [2 1 1 1 3 2] score 0.794 initialq 3.015 eval_avg 2.730 eval_min 2.391 per 2.395
model_ix 85: policy [2 1 1 1 3 2] score 0.750 initialq 2.902 eval_avg 2.730 eval_min 2.389 per 2.392
model_ix 57: policy [2 1 1 1 3 2] score 0.750 initialq 3.389 eval_avg 2.730 eval_min 2.386 per 2.391
model_ix 95: policy [2 1 1 1 3 2] score 0.712 initialq 2.764 eval_avg 2.729 eval_min 2.381 per 2.387
model_ix 99: policy [1 1 1 3 3 2] score 0.500 initialq 2.840 eval_avg 2.902 eval_min 2.365 per 2.371
model_ix 46: policy [1 1 1 3 3 2] score 0.750 initialq 3.015 eval_avg 2.905 eval_min 2.365 per 2.369
model_ix 66: policy [1 1 1 3 3 2] score 0.750 initialq 3.001 eval_avg 2.904 eval_min 2.360 per 2.368
model_ix  9: policy [1 2 1 1 3 3] score 0.750 initialq 2.718 eval_avg 2.715 eval_min 2.365 per 2.366
model_ix 23: policy [1 2 1 1 3 3] score 0.750 initialq 2.674 eval_avg 2.714 eval_min 2.365 per 2.365
model_ix 91: policy [1 2 1 1 3 3] score 0.750 initialq 2.872 eval_avg 2.714 eval_min 2.364 per 2.364
model_ix 82: policy [1 2 1 1 3 3] score 0.750 initialq 2.623 eval_avg 2.715 eval_min 2.363 per 2.364
model_ix  7: policy [1 2 2 1 3 3] score 1.000 initialq 2.565 eval_avg 2.679 eval_min 2.355 per 2.357
model_ix 39: policy [2 1 3 1 3 0] score 0.500 initialq 2.733 eval_avg 2.633 eval_min 2.340 per 2.345
model_ix  5: policy [1 1 3 3 1 2] score 0.849 initialq 2.969 eval_avg 2.734 eval_min 2.337 per 2.339
model_ix  1: policy [1 1 1 3 0 2] score 0.500 initialq 3.117 eval_avg 2.761 eval_min 2.332 per 2.334
model_ix 78: policy [1 2 1 1 3 0] score 0.500 initialq 2.984 eval_avg 2.705 eval_min 2.321 per 2.330
model_ix 86: policy [1 2 1 0 1 3] score 0.536 initialq 2.576 eval_avg 2.657 eval_min 2.326 per 2.328
model_ix 54: policy [1 2 2 1 3 0] score 0.750 initialq 2.412 eval_avg 2.608 eval_min 2.317 per 2.328
model_ix 97: policy [1 2 1 1 3 0] score 0.504 initialq 3.168 eval_avg 2.704 eval_min 2.314 per 2.325
model_ix 77: policy [1 2 1 1 3 0] score 0.539 initialq 3.106 eval_avg 2.705 eval_min 2.299 per 2.313
model_ix 40: policy [1 1 1 3 2 0] score 0.518 initialq 3.059 eval_avg 2.794 eval_min 2.304 per 2.311
model_ix 92: policy [2 1 1 1 3 3] score 0.736 initialq 2.553 eval_avg 2.697 eval_min 2.296 per 2.311
model_ix 94: policy [1 2 1 2 1 1] score 0.750 initialq 2.436 eval_avg 2.482 eval_min 2.291 per 2.311
model_ix 60: policy [1 2 0 0 0 0] score 0.250 initialq 2.491 eval_avg 2.444 eval_min 2.305 per 2.308
model_ix 89: policy [1 1 3 1 0 2] score 0.500 initialq 3.122 eval_avg 2.603 eval_min 2.296 per 2.306
model_ix  4: policy [1 1 1 3 2 0] score 0.500 initialq 2.927 eval_avg 2.795 eval_min 2.296 per 2.302
model_ix 43: policy [1 1 3 3 2 3] score 0.750 initialq 3.083 eval_avg 2.747 eval_min 2.299 per 2.301
model_ix  0: policy [1 1 3 2 1 3] score 0.750 initialq 2.827 eval_avg 2.763 eval_min 2.269 per 2.294
model_ix 65: policy [1 1 1 3 2 1] score 0.513 initialq 2.918 eval_avg 2.775 eval_min 2.258 per 2.293
model_ix 38: policy [1 1 1 3 2 1] score 0.500 initialq 2.942 eval_avg 2.773 eval_min 2.257 per 2.292
model_ix 21: policy [1 1 1 3 2 1] score 0.500 initialq 3.169 eval_avg 2.774 eval_min 2.256 per 2.291
model_ix 25: policy [1 1 3 1 2 1] score 0.500 initialq 3.007 eval_avg 2.588 eval_min 2.262 per 2.291
model_ix 49: policy [1 1 1 1 3 2] score 0.526 initialq 2.774 eval_avg 2.858 eval_min 2.270 per 2.291
model_ix  8: policy [1 1 3 3 2 3] score 0.750 initialq 3.221 eval_avg 2.748 eval_min 2.281 per 2.288
model_ix 83: policy [1 1 2 3 3 2] score 0.988 initialq 3.168 eval_avg 2.524 eval_min 2.281 per 2.282
model_ix 10: policy [1 1 3 3 2 3] score 0.750 initialq 3.126 eval_avg 2.747 eval_min 2.273 per 2.281
model_ix 36: policy [1 1 2 3 3 2] score 1.000 initialq 3.325 eval_avg 2.524 eval_min 2.273 per 2.274
model_ix 34: policy [1 1 3 3 2 3] score 0.755 initialq 3.044 eval_avg 2.747 eval_min 2.260 per 2.267
model_ix 81: policy [1 1 3 3 0 2] score 0.750 initialq 3.204 eval_avg 2.695 eval_min 2.243 per 2.256
model_ix 35: policy [1 2 1 3 3 0] score 0.750 initialq 2.993 eval_avg 2.643 eval_min 2.246 per 2.249
model_ix 61: policy [1 1 3 3 0 2] score 0.750 initialq 3.156 eval_avg 2.693 eval_min 2.229 per 2.246
model_ix 33: policy [1 1 3 3 2 0] score 1.000 initialq 2.954 eval_avg 2.755 eval_min 2.237 per 2.244
model_ix 30: policy [1 1 3 3 2 0] score 0.750 initialq 2.903 eval_avg 2.754 eval_min 2.237 per 2.243
model_ix 50: policy [1 1 3 3 2 0] score 0.750 initialq 3.154 eval_avg 2.756 eval_min 2.232 per 2.241
model_ix 48: policy [1 1 3 3 2 0] score 0.753 initialq 3.067 eval_avg 2.753 eval_min 2.231 per 2.239
model_ix 17: policy [1 1 3 3 2 0] score 0.750 initialq 3.432 eval_avg 2.758 eval_min 2.236 per 2.238
model_ix 11: policy [1 1 3 3 2 0] score 0.750 initialq 3.151 eval_avg 2.757 eval_min 2.228 per 2.233
model_ix 12: policy [1 1 3 3 2 0] score 0.742 initialq 3.262 eval_avg 2.756 eval_min 2.219 per 2.229
model_ix 84: policy [1 1 3 3 2 0] score 0.751 initialq 2.909 eval_avg 2.752 eval_min 2.218 per 2.227
model_ix 72: policy [2 1 1 3 3 2] score 0.991 initialq 2.931 eval_avg 2.680 eval_min 2.218 per 2.226
model_ix 27: policy [2 1 1 3 3 2] score 1.000 initialq 3.183 eval_avg 2.681 eval_min 2.200 per 2.216
model_ix 28: policy [2 1 1 3 3 2] score 0.500 initialq 2.900 eval_avg 2.682 eval_min 2.192 per 2.206
model_ix 53: policy [1 2 1 3 3 2] score 1.000 initialq 3.328 eval_avg 2.693 eval_min 2.181 per 2.201
model_ix 69: policy [1 2 1 3 3 2] score 0.999 initialq 3.196 eval_avg 2.691 eval_min 2.178 per 2.196
model_ix 22: policy [1 2 1 3 3 2] score 0.888 initialq 2.830 eval_avg 2.691 eval_min 2.192 per 2.194
model_ix  3: policy [1 2 1 3 3 2] score 0.828 initialq 2.421 eval_avg 2.689 eval_min 2.177 per 2.192
model_ix 44: policy [1 2 1 3 3 2] score 1.000 initialq 3.212 eval_avg 2.691 eval_min 2.171 per 2.191
model_ix 13: policy [1 2 1 3 3 2] score 1.000 initialq 3.264 eval_avg 2.691 eval_min 2.173 per 2.186
model_ix 79: policy [1 2 1 3 3 2] score 0.999 initialq 3.163 eval_avg 2.692 eval_min 2.172 per 2.185
model_ix 56: policy [1 2 1 3 3 2] score 1.000 initialq 3.118 eval_avg 2.693 eval_min 2.161 per 2.180
model_ix 51: policy [1 2 1 3 3 2] score 0.996 initialq 3.113 eval_avg 2.690 eval_min 2.172 per 2.179
model_ix 67: policy [1 2 1 3 3 2] score 1.000 initialq 3.251 eval_avg 2.688 eval_min 2.168 per 2.176
model_ix 20: policy [1 2 1 3 0 3] score 0.500 initialq 2.515 eval_avg 2.625 eval_min 2.124 per 2.172
model_ix 14: policy [1 1 3 3 2 1] score 0.750 initialq 3.113 eval_avg 2.767 eval_min 2.098 per 2.170
model_ix 42: policy [1 1 3 3 2 1] score 0.750 initialq 2.980 eval_avg 2.766 eval_min 2.096 per 2.169
model_ix 76: policy [1 1 3 3 2 1] score 0.750 initialq 2.841 eval_avg 2.767 eval_min 2.086 per 2.161
model_ix 64: policy [2 1 1 3 3 0] score 0.750 initialq 3.400 eval_avg 2.624 eval_min 2.124 per 2.157
model_ix 45: policy [2 1 0 1 1 0] score 0.646 initialq 2.483 eval_avg 2.451 eval_min 2.136 per 2.144
model_ix 70: policy [1 3 1 3 2 3] score 0.750 initialq 2.816 eval_avg 2.747 eval_min 2.046 per 2.070
model_ix 59: policy [1 2 1 1 3 1] score 0.500 initialq 2.795 eval_avg 2.585 eval_min 2.044 per 2.067
model_ix 41: policy [2 1 1 1 1 3] score 0.500 initialq 2.990 eval_avg 2.636 eval_min 2.029 per 2.054
model_ix 80: policy [1 1 3 3 2 2] score 1.000 initialq 3.118 eval_avg 2.653 eval_min 2.016 per 2.029
model_ix 93: policy [1 1 3 2 3 0] score 0.750 initialq 3.321 eval_avg 2.688 eval_min 1.946 per 2.029
model_ix 62: policy [1 1 3 3 2 2] score 1.000 initialq 3.223 eval_avg 2.655 eval_min 2.018 per 2.025
model_ix 16: policy [1 1 3 3 2 2] score 1.000 initialq 3.308 eval_avg 2.656 eval_min 2.007 per 2.023
model_ix 18: policy [1 1 3 3 2 2] score 1.000 initialq 3.245 eval_avg 2.656 eval_min 2.004 per 2.013
model_ix  6: policy [1 1 3 2 3 0] score 0.753 initialq 3.374 eval_avg 2.687 eval_min 1.908 per 1.997
model_ix  2: policy [1 1 3 2 3 2] score 1.000 initialq 3.316 eval_avg 2.681 eval_min 1.995 per 1.996
model_ix 90: policy [1 1 3 2 3 2] score 1.000 initialq 3.290 eval_avg 2.680 eval_min 1.975 per 1.988
model_ix 98: policy [1 1 3 2 3 2] score 0.996 initialq 2.932 eval_avg 2.681 eval_min 1.976 per 1.983
model_ix 32: policy [1 1 3 2 3 2] score 1.000 initialq 3.301 eval_avg 2.682 eval_min 1.968 per 1.973
model_ix 74: policy [1 1 3 2 3 2] score 1.000 initialq 3.221 eval_avg 2.685 eval_min 1.948 per 1.967
model_ix 55: policy [1 1 3 2 3 2] score 1.000 initialq 3.285 eval_avg 2.681 eval_min 1.953 per 1.967
model_ix 47: policy [1 1 3 2 3 2] score 1.000 initialq 3.249 eval_avg 2.682 eval_min 1.957 per 1.965
model_ix 63: policy [1 1 3 2 3 2] score 0.993 initialq 3.175 eval_avg 2.680 eval_min 1.950 per 1.961
model_ix 15: policy [1 1 3 2 3 2] score 0.961 initialq 2.981 eval_avg 2.680 eval_min 1.944 per 1.960
model_ix 19: policy [1 1 3 2 3 2] score 1.000 initialq 3.208 eval_avg 2.679 eval_min 1.951 per 1.959
model_ix 96: policy [1 1 3 2 3 2] score 1.000 initialq 3.143 eval_avg 2.679 eval_min 1.939 per 1.957
model_ix 24: policy [1 1 3 2 3 2] score 1.000 initialq 3.272 eval_avg 2.679 eval_min 1.936 per 1.957
model_ix 31: policy [1 1 3 2 3 2] score 1.000 initialq 3.259 eval_avg 2.681 eval_min 1.943 per 1.955
model_ix 88: policy [1 1 3 2 0 0] score 0.500 initialq 3.252 eval_avg 2.562 eval_min 1.922 per 1.954
model_ix 37: policy [1 1 3 2 3 2] score 0.966 initialq 3.289 eval_avg 2.681 eval_min 1.946 per 1.953
model_ix 58: policy [1 1 3 2 3 2] score 1.000 initialq 3.254 eval_avg 2.683 eval_min 1.936 per 1.952
model_ix 26: policy [1 2 1 1 1 0] score 0.500 initialq 2.462 eval_avg 2.450 eval_min 1.906 per 1.937
model_ix 68: policy [1 1 3 2 2 3] score 1.000 initialq 3.356 eval_avg 2.528 eval_min 1.850 per 1.852
model_ix 87: policy [1 1 3 2 2 3] score 1.000 initialq 3.160 eval_avg 2.530 eval_min 1.846 per 1.850

In [2]:
'''
Now let's look at proper RME with no dropout
'''

def get_ranks(sorted_indices):
    ranks = np.zeros(sorted_indices.shape,dtype=np.int)
    for i in six.moves.range(sorted_indices.shape[0]):
        ranks[sorted_indices[i]] = i+1
    return ranks

def array2str(arr):
    inner = ' '.join('{:.3f}'.format(x) for x in arr)
    return '[{}]'.format(inner)

data11 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')

data21 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

data31 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runB.npz')

data51 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')

data61 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runB.npz')

data71 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories200-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories200-runB.npz')

# each row is a real environment
raw_evals = np.vstack([data71['evals'],data72['evals']]).T
eval_ixs = np.arange(raw_evals.shape[0])

vloss = np.concatenate([data11['vloss'],data12['vloss']])[:,-1]
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
behavior = np.concatenate([data31['rewards'][:,0],data32['rewards'][:,0]]) / 4.0
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])[:,0,:]
#qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:]])

def normalizeRME(raw_evals):
    '''
    Find the global mean, and then shift each row's mean to be the global mean.
    '''
    #six.print_(raw_evals)
    globalmean = np.mean(raw_evals)
    shifts = globalmean - np.mean(raw_evals, axis=0)
    new_evals = raw_evals + shifts[np.newaxis,:]
    #six.print_(globalmean)
    #six.print_(shifts)
    #six.print_(new_evals)
    return new_evals

def printmatrixs(es,ixs,scores,behavior,shift=False):
    if shift:
        temp_es = normalizeRME(es)
    else:
        temp_es = es
    for r in six.moves.range(ixs.shape[0]):
        ix = ixs[r]
        six.print_('policy model_ix {:2d}: score {:.3f} behavior {:3f} {}'.format(
            ix, scores[ix], behavior[ix], temp_es[r,:]))
        #six.print_('avg {:2d}: {:.3f} | min {:2d}: {:.3f} | per {:2d}: {:.3f}'.format(
        #    ranked_avg_ix[ix], eval_avg[ix], ranked_min_ix[ix], eval_min[ix], ranked_per_ix[ix], eval_per[ix]))
        #six.print_('std other {:2d}: {:.3f} | std own {:2d}: {:.3f}'.format(
        #    ranked_std_ix[ix], eval_std[ix], ranked_stdt_ix[ix], eval_stdt[ix]))    

def computemetric(es):
    # compute the metric
    
    # currently using average eval
    temp_es = normalizeRME(es)
    
    # ignore self predictions completely
    # unfortunately doesn't seem to make that big of a difference
    temp_es_other = temp_es * (1.0 - np.eye(temp_es.shape[0]))
    
    metric = np.mean(temp_es_other,axis=1)
    #metric = np.min(es,axis=1)
    #metric = np.percentile(es,0.5,axis=1)
    #metric = np.std(es,axis=0)
    
    return metric

# try removing some of them
def remove_worst(es,ixs):
    
    metric = computemetric(es)
    metricix = np.flip(np.argsort(metric), 0)
    #metricix = np.argsort(metric)
    worst_ix = metricix[-1]
    
    metric2 = computemetric(es)
    metrix2ix = np.flip(np.argsort(metric2), 0)
    
    best_ix = metrix2ix[0]
    if metrix2ix.shape[0] > 1:
        best_ix2 = metrix2ix[1]
    
    six.print_('Removing worst ix {:2d}: score {:.4f} behavior {:3f}'.format(
        ixs[worst_ix], scores[ixs[worst_ix]], behavior[ixs[worst_ix]]))
    six.print_(' = Current best ix {:2d}: score {:.4f} behavior {:3f}'.format(
        ixs[best_ix], scores[ixs[best_ix]], behavior[ixs[best_ix]]))
    if metrix2ix.shape[0] > 1:
        six.print_(' = Current 2nd best ix {:2d}: score {:.4f} behavior {:3f}'.format(
            ixs[best_ix2], scores[ixs[best_ix2]], behavior[ixs[best_ix2]]))
    mask = np.ones(es.shape[0],dtype=bool)
    mask[worst_ix] = False
    es = es[mask,:]
    es = es[:,mask]
    ixs = ixs[mask]
    return es, ixs

def analyzeRME(raw_evals, eval_ixs, scores, vloss, behavior, initialq):
    # show initial average means
    #temp_es = normalizeRME(raw_evals)
    temp_es = raw_evals
    metric = np.mean(temp_es,axis=1)
    metricix = np.flip(np.argsort(metric), 0)
    six.print_('Initial models ordered by average eval (normalized): {}'.format(metricix))
    six.print_('Corresponding average evals: {}'.format(metric[metricix]))
    
    # look at correlation between behavior and scores
    figure()
    title('rtype 2')
    plot(behavior,scores,'.',color='#0000ff')
    xlabel('behavior')
    ylabel('scores')
    
    # initial matrix
    six.print_('Initial matrix limited to the top 6 models')
    top6 = metricix[:6]
    raw_evals2 = raw_evals[top6,:]
    raw_evals2 = raw_evals2[:,top6]
    eval_ixs2 = eval_ixs[top6]
    six.print_('Normalized:')
    printmatrixs(raw_evals2, eval_ixs2, scores, behavior, shift=True)
    six.print_('Original:')
    printmatrixs(raw_evals2, eval_ixs2, scores, behavior, shift=False)
    
    
    for i in six.moves.range(38):
        raw_evals, eval_ixs = remove_worst(raw_evals, eval_ixs)
        #if i > 35 and i < 39:
        #    printmatrixs(raw_evals, eval_ixs, scores)
    six.print_('Normalized:')
    printmatrixs(raw_evals, eval_ixs, scores, behavior, shift=True)
    six.print_('Original:')
    printmatrixs(raw_evals, eval_ixs, scores, behavior, shift=False)
    #six.print_('Raw evals shape {}'.format(raw_evals.shape))
    #six.print_('Raw evals ixs {}'.format(eval_ixs))
    num_models = raw_evals.shape[0]
    
    #sorted_score_eix = np.flip(np.argsort(scores[eval_ixs]), 0)
    #sorted_score_ix = eval_ixs[sorted_score_eix]
    #six.print_('Indices sorted by score {}'.format(sorted_score_ix))

    # use the top few vloss models to evaluate
    if False:
        sorted_vloss = vloss[sorted_score_ix,-1]
        sorted_vloss_ix = np.argsort(sorted_vloss)
        ranked_vloss_ix = get_ranks(sorted_vloss_ix)
        topmodels = sorted_vloss_ix[:10]
        raw_evals = raw_evals[:,topmodels]
        six.print_('Sorted Validation Loss {}'.format(sorted_vloss))
        six.print_('Sorted Validation Loss Model Indices {}'.format(sorted_vloss_ix))
        six.print_(ranked_vloss_ix)

    if False:
        eval_avg = np.mean(raw_evals,axis=1)
        sorted_avg_eix = np.flip(np.argsort(eval_avg), 0)
        sorted_avg_ix = eval_ixs[sorted_avg_eix]
        ranked_avg_eix = get_ranks(sorted_avg_eix)

        eval_min = np.min(raw_evals,axis=1)
        sorted_min_eix = np.flip(np.argsort(eval_min), 0)
        sorted_min_ix = eval_ixs[sorted_min_eix]
        ranked_min_eix = get_ranks(sorted_min_eix)

        eval_per = np.percentile(raw_evals,0.25,axis=1)
        sorted_per_eix = np.flip(np.argsort(eval_per), 0)
        sorted_per_ix = eval_ixs[sorted_per_eix]
        ranked_per_eix = get_ranks(sorted_per_eix)

        eval_max = np.max(raw_evals,axis=1)
        sorted_max_eix = np.flip(np.argsort(eval_max), 0)
        sorted_max_ix = eval_ixs[sorted_max_eix]
        ranked_max_eix = get_ranks(sorted_max_eix)

        six.print_('Sorted by avg eval')
        six.print_('ixs: {}'.format(array2str(sorted_avg_ix)))
        six.print_('scores: {}'.format(array2str(scores[sorted_avg_ix])))

        six.print_('Sorted by min eval')
        six.print_('scores: {}'.format(array2str(scores[sorted_min_ix])))

        six.print_('Sorted by 25% per eval')
        six.print_('scores: {}'.format(array2str(scores[sorted_per_ix])))

        six.print_('Sorted by max eval')
        six.print_('scores: {}'.format(array2str(scores[sorted_max_ix])))

        eval_std = np.std(raw_evals,axis=1)
        sorted_std_eix = np.argsort(eval_std)
        sorted_std_ix = eval_ixs[sorted_std_eix]
        ranked_std_eix = get_ranks(sorted_std_eix)

        six.print_('Sorted by smallest std of evals by other models')
        six.print_('scores: {}'.format(array2str(scores[sorted_std_ix])))

        eval_stdt = np.std(raw_evals,axis=0)
        sorted_stdt_eix = np.argsort(eval_stdt)
        sorted_stdt_ix = eval_ixs[sorted_stdt_eix]
        ranked_stdt_eix = get_ranks(sorted_stdt_eix)

        six.print_('Sorted by smallest std of own evals')
        six.print_('scores: {}'.format(array2str(scores[sorted_stdt_ix])))

analyzeRME(raw_evals, eval_ixs, scores, vloss, behavior, initialq)


Initial models ordered by average eval (normalized): [16 19  4 23 14 13  1 26 34 15 20 12  2 38  5 18 36 25 28  7 35 32  6  8 24
 22  0 27 17 21 30  3 31 11 39 29 10 37 33  9]
Corresponding average evals: [ 0.97199326  0.97048718  0.96884563  0.94370891  0.94280578  0.93329766
  0.93306345  0.93205693  0.92177525  0.91790356  0.91190244  0.91176114
  0.91072734  0.90841993  0.90720764  0.90194236  0.90192156  0.89963918
  0.89870888  0.89561655  0.89306879  0.8910653   0.87720681  0.87638965
  0.87149877  0.87110995  0.86475839  0.83913973  0.83487522  0.81697656
  0.81488811  0.80385715  0.80328164  0.80012653  0.79659556  0.79556427
  0.79361316  0.78121458  0.78001273  0.67641271]
Initial matrix limited to the top 6 models
Normalized:
policy model_ix 16: score 0.977 behavior 0.012805 [ 0.97692005  0.96516786  1.01574539  0.99916927  1.0128447   0.95703695]
policy model_ix 19: score 0.750 behavior 0.016866 [ 0.97643041  0.98842572  1.01397261  0.99399241  1.02110679  0.97539216]
policy model_ix  4: score 0.747 behavior 0.013435 [ 0.97460066  0.95948558  1.01577788  0.997378    0.99971949  0.95924772]
policy model_ix 23: score 0.750 behavior 0.009969 [ 0.96261989  0.95935253  1.00783512  0.99943948  0.92050578  0.96784411]
policy model_ix 14: score 0.500 behavior 0.013280 [ 0.95135973  0.96567523  0.92001381  0.87422896  1.02029366  0.92743071]
policy model_ix 13: score 1.000 behavior 0.008202 [ 0.92526014  0.92908396  0.79384606  0.90298276  0.79272046  0.98023924]
Original:
policy model_ix 16: score 0.977 behavior 0.012805 [ 0.99709595  0.97391605  0.99691487  0.99670975  0.98876497  0.97348263]
policy model_ix 19: score 0.750 behavior 0.016866 [ 0.9966063   0.99717392  0.99514209  0.99153289  0.99702706  0.99183784]
policy model_ix  4: score 0.747 behavior 0.013435 [ 0.99477656  0.96823377  0.99694736  0.99491848  0.97563976  0.9756934 ]
policy model_ix 23: score 0.750 behavior 0.009969 [ 0.98279579  0.96810072  0.9890046   0.99697996  0.89642605  0.98428979]
policy model_ix 14: score 0.500 behavior 0.013280 [ 0.97153563  0.97442342  0.90118329  0.87176944  0.99621393  0.94387639]
policy model_ix 13: score 1.000 behavior 0.008202 [ 0.94543604  0.93783216  0.77501553  0.90052324  0.76864073  0.99668492]
Removing worst ix  9: score 0.7057 behavior 0.002620
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix 19: score 0.7500 behavior 0.016866
Removing worst ix 33: score 0.7500 behavior 0.001837
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix 19: score 0.7500 behavior 0.016866
Removing worst ix 37: score 0.7500 behavior 0.001846
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix 19: score 0.7500 behavior 0.016866
Removing worst ix 10: score 0.5000 behavior 0.003771
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix 19: score 0.7500 behavior 0.016866
Removing worst ix 39: score 0.5859 behavior 0.000540
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 31: score 0.6120 behavior 0.001880
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 29: score 0.5000 behavior 0.001586
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 11: score 0.7526 behavior 0.002437
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 30: score 0.6146 behavior 0.001319
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 21: score 0.7500 behavior 0.002410
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 17: score 0.5026 behavior 0.001041
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 27: score 0.6693 behavior 0.000617
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  3: score 0.7500 behavior 0.006661
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 22: score 0.7500 behavior 0.008103
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  8: score 0.5000 behavior 0.012952
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 24: score 0.5391 behavior 0.009562
 = Current best ix 16: score 0.9766 behavior 0.012805
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 35: score 0.7422 behavior 0.012201
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  6: score 0.9245 behavior 0.010662
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 28: score 0.7500 behavior 0.011487
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  0: score 0.9870 behavior 0.009493
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 36: score 1.0000 behavior 0.015878
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 32: score 0.7500 behavior 0.015894
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 38: score 0.7500 behavior 0.009354
 = Current best ix  4: score 0.7474 behavior 0.013435
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  5: score 0.6016 behavior 0.015276
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 12: score 0.5104 behavior 0.010335
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 20: score 0.7500 behavior 0.014925
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 25: score 0.5000 behavior 0.011381
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 34: score 0.7500 behavior 0.011260
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 18: score 0.5964 behavior 0.016038
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 15: score 0.7396 behavior 0.012586
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  2: score 0.5000 behavior 0.014225
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 13: score 1.0000 behavior 0.008202
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  7: score 0.7500 behavior 0.008849
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 14: score 0.5000 behavior 0.013280
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  1: score 0.9531 behavior 0.009575
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix  4: score 0.7474 behavior 0.013435
Removing worst ix 23: score 0.7500 behavior 0.009969
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix 26: score 0.8385 behavior 0.011578
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Removing worst ix  4: score 0.7474 behavior 0.013435
 = Current best ix 19: score 0.7500 behavior 0.016866
 = Current 2nd best ix 16: score 0.9766 behavior 0.012805
Normalized:
policy model_ix 16: score 0.977 behavior 0.012805 [ 0.99144288  0.97956912]
policy model_ix 19: score 0.750 behavior 0.016866 [ 0.99095323  1.00282699]
Original:
policy model_ix 16: score 0.977 behavior 0.012805 [ 0.99709595  0.97391605]
policy model_ix 19: score 0.750 behavior 0.016866 [ 0.9966063   0.99717392]

In [3]:
'''
Now let's look at proper RME with dropout
'''

def get_ranks(sorted_indices):
    ranks = np.zeros(sorted_indices.shape,dtype=np.int)
    for i in six.moves.range(sorted_indices.shape[0]):
        ranks[sorted_indices[i]] = i+1
    return ranks

data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runC.npz')

data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runC.npz')

data31 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runC.npz')

data41 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runA.npz')
data42 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype2-trajectories400-runC.npz')

data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runC.npz')

data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runC.npz')

data71 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories100-runA.npz')
data72 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/rmeproper-rtype1-rollouts1000-trajectories100-runC.npz')

# each row is a real environment
raw_evals = np.vstack([data71['evals'],data72['evals']]).T
eval_ixs = np.arange(raw_evals.shape[0])

vloss = np.concatenate([data11['vloss'],data12['vloss']])[:,-1]
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
behavior = np.concatenate([data41['rewards'][:,0],data42['rewards'][:,0]]) / 4.0
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])[:,0,:]

analyzeRME(raw_evals, eval_ixs, scores, vloss, behavior, initialq)


Initial models ordered by average eval (normalized): [ 6 31 37 27 24  2 13 32 19 29 18 36 35 11  1 15 12 16 17 22 38 34 33 14  4
  8  0 10 28  5  9 21 30  7 23 25 20  3 26 39]
Corresponding average evals: [ 0.79046231  0.78443079  0.75787238  0.75163939  0.75045328  0.74879982
  0.74810535  0.74690064  0.73879944  0.73737043  0.73634734  0.73538011
  0.72511568  0.71326813  0.7105164   0.7085891   0.70802477  0.70394537
  0.69985053  0.69167164  0.68797904  0.68401038  0.68056144  0.67730573
  0.67719844  0.67629833  0.67355014  0.67313396  0.668639    0.66452524
  0.64444795  0.63478325  0.63420727  0.6299764   0.62566973  0.6219838
  0.61848848  0.61331204  0.61259509  0.59964631]
Initial matrix limited to the top 6 models
Normalized:
policy model_ix  6: score 0.753 behavior 0.003157 [ 0.88160566  0.84706621  0.85344805  0.84457142  0.84588378  0.84642003]
policy model_ix 31: score 1.000 behavior 0.003621 [ 0.80818227  0.82822605  0.82822366  0.83018309  0.82783115  0.82834948]
policy model_ix 37: score 0.966 behavior 0.004963 [ 0.86047666  0.83136503  0.85684807  0.83050135  0.82972335  0.82731821]
policy model_ix 27: score 1.000 behavior 0.004479 [ 0.81612238  0.8170752   0.81327963  0.82692792  0.82090894  0.81180547]
policy model_ix 24: score 1.000 behavior 0.003319 [ 0.82571681  0.82472971  0.81568117  0.82747932  0.82701644  0.82155598]
policy model_ix  2: score 1.000 behavior 0.003722 [ 0.80046204  0.84410362  0.82508524  0.83290272  0.84120217  0.85711666]
Original:
policy model_ix  6: score 0.753 behavior 0.003157 [ 0.85792593  0.85955294  0.83836259  0.85038139  0.85722227  0.85555003]
policy model_ix 31: score 1.000 behavior 0.003621 [ 0.78450254  0.84071279  0.8131382   0.83599307  0.83916963  0.83747948]
policy model_ix 37: score 0.966 behavior 0.004963 [ 0.83679693  0.84385177  0.84176261  0.83631133  0.84106184  0.83644821]
policy model_ix 27: score 1.000 behavior 0.004479 [ 0.79244265  0.82956194  0.79819417  0.83273789  0.83224742  0.82093547]
policy model_ix 24: score 1.000 behavior 0.003319 [ 0.80203708  0.83721644  0.8005957   0.8332893   0.83835492  0.83068599]
policy model_ix  2: score 1.000 behavior 0.003722 [ 0.77678231  0.85659036  0.80999978  0.83871269  0.85254065  0.86624666]
Removing worst ix 39: score 0.5000 behavior 0.001941
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 26: score 0.5000 behavior 0.003941
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  3: score 0.8281 behavior 0.001953
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 20: score 0.5000 behavior 0.001537
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 25: score 0.5000 behavior 0.001459
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 23: score 0.7500 behavior 0.001754
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  7: score 1.0000 behavior 0.002761
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 21: score 0.5000 behavior 0.001187
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 30: score 0.7500 behavior 0.001257
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  9: score 0.7500 behavior 0.002295
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 28: score 0.5000 behavior 0.002909
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  5: score 0.8490 behavior 0.005161
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 10: score 0.7500 behavior 0.003607
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 14: score 0.7500 behavior 0.003656
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  0: score 0.7500 behavior 0.003497
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  8: score 0.7500 behavior 0.002865
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  4: score 0.5000 behavior 0.003085
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 22: score 0.8880 behavior 0.003793
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 38: score 0.5000 behavior 0.002684
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 33: score 1.0000 behavior 0.003930
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 12: score 0.7422 behavior 0.003832
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 34: score 0.7552 behavior 0.001583
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 11: score 0.7500 behavior 0.004412
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 15: score 0.9609 behavior 0.003706
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix  1: score 0.5000 behavior 0.003829
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 35: score 0.7500 behavior 0.004050
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 13: score 1.0000 behavior 0.003605
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 29: score 0.7943 behavior 0.004145
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 31: score 1.0000 behavior 0.003621
Removing worst ix 36: score 1.0000 behavior 0.003425
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix  2: score 1.0000 behavior 0.003722
Removing worst ix 17: score 0.7500 behavior 0.001827
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix  2: score 1.0000 behavior 0.003722
Removing worst ix 19: score 1.0000 behavior 0.002238
 = Current best ix  2: score 1.0000 behavior 0.003722
 = Current 2nd best ix  6: score 0.7526 behavior 0.003157
Removing worst ix 27: score 1.0000 behavior 0.004479
 = Current best ix  2: score 1.0000 behavior 0.003722
 = Current 2nd best ix  6: score 0.7526 behavior 0.003157
Removing worst ix 18: score 1.0000 behavior 0.004083
 = Current best ix  2: score 1.0000 behavior 0.003722
 = Current 2nd best ix  6: score 0.7526 behavior 0.003157
Removing worst ix 24: score 1.0000 behavior 0.003319
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix  2: score 1.0000 behavior 0.003722
Removing worst ix 31: score 1.0000 behavior 0.003621
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix  2: score 1.0000 behavior 0.003722
Removing worst ix 32: score 1.0000 behavior 0.004487
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 16: score 1.0000 behavior 0.001579
Removing worst ix  2: score 1.0000 behavior 0.003722
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 16: score 1.0000 behavior 0.001579
Removing worst ix 16: score 1.0000 behavior 0.001579
 = Current best ix  6: score 0.7526 behavior 0.003157
 = Current 2nd best ix 37: score 0.9661 behavior 0.004963
Normalized:
policy model_ix  6: score 0.753 behavior 0.003157 [ 0.85427651  0.842012  ]
policy model_ix 37: score 0.966 behavior 0.004963 [ 0.83314751  0.84541202]
Original:
policy model_ix  6: score 0.753 behavior 0.003157 [ 0.85792593  0.83836259]
policy model_ix 37: score 0.966 behavior 0.004963 [ 0.83679693  0.84176261]

In [ ]: