In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import six
from matplotlib.pyplot import *

In [8]:
figure()
title('MCTS with Different Reward Types')
xlabel('# rollouts')
ylabel('posttest score over 100 runs')

xs = np.array([50,100,200,400,1000])
densey = np.array([0.8645,0.8072,0.91927,0.8411,0.8333])
semisparsey = np.array([0.9296,0.9843,1,1,1])
sparsey = np.array([0.8567,0.9401,0.9348,0.9505,1])

plot(xs, densey, 'o-', color='blue', label='dense')
plot(xs, semisparsey, 'o-', color='red', label='semisparse')
plot(xs, sparsey, 'o-', color='green', label='sparse')

legend(loc='center right')


Out[8]:
<matplotlib.legend.Legend at 0x7f8f4796a310>

In [5]:
figure()
title('Losses with DKT+MCTS Horizon 5 Dropout=0.7')
xlabel('training epoch')
ylabel('val loss')

data = np.load('experiments/dropoutput1.npz')

vals = data['vals'][0,:,:]

sliceend = 20

xs = np.array(range(20))[:sliceend]

ys_val = np.mean(vals,axis=0)[:sliceend]
err_val = np.std(vals, axis=0)[:sliceend]*1.96/np.sqrt(10)


plot(xs, ys_val, color='#ff00ff')
plot(xs, ys_val+err_val, color='#ffccff')
plot(xs, ys_val-err_val, color='#ffccff')

plot(xs, np.repeat(0.0097,xs.shape),color='blue')
plot(xs, np.repeat(0.0097+(0.019-0.0097)*0.05 ,xs.shape),color='blue')
# with 0.05 threshold, looks like epoch 9


Out[5]:
[<matplotlib.lines.Line2D at 0x7f006eab0710>]

In [9]:
figure()
title('Scores with DKT+MCTS Horizon 5 Dropout=0.7')
xlabel('training epoch')
ylabel('score')

data = np.load('experiments/earlystopping4.npz')

print(data['scores'][:,:])

xs = data['eps'][0,:]
ys = np.mean(data['scores'], axis=0)
err = np.std(data['scores'], axis=0)*1.96/np.sqrt(data['scores'].shape[0])

plot(xs, ys, color='#0000ff')
plot(xs, ys+err, color='#ccccff')
plot(xs, ys-err, color='#ccccff')

figure()
title('Val Loss with DKT Horizon 5 Dropout=0.7')
xlabel('training epoch')
ylabel('val loss')

ys = np.mean(data['vals'][5:6,:], axis=0)
plot(range(ys.shape[0]), ys, color='#ff00ff')

figure()
title('DKT+MCTS Horizon 5 Dropout=0.7 Val Loss vs score')
xlabel('val loss')
ylabel('score')

xs = np.sum(data['vals'][:,:20], axis=1)
ys = data['scores'][:,6]
plot(xs, ys, '.')


[[ 0.75        0.65885417  0.75        0.75        0.75        0.72135417
   0.5390625   0.67708333  0.50260417  0.56510417  0.63541667  0.64322917
   0.53125     0.51302083  0.5390625 ]
 [ 0.75        0.75        0.75        0.75        0.75        0.75        0.75
   0.75        0.75        0.75        0.75        0.70833333  0.6328125
   0.70572917  0.65625   ]
 [ 0.75        0.75        0.75        0.75        0.578125    0.29427083
   0.49479167  0.48958333  0.50260417  0.51822917  0.54947917  0.61979167
   0.74479167  0.58333333  0.75      ]
 [ 0.74739583  0.75        0.75        0.75        0.7265625   0.63020833
   0.5078125   0.51822917  0.51822917  0.49739583  0.5078125   0.55729167
   0.46614583  0.49739583  0.59895833]
 [ 0.75        0.5         0.5         0.65885417  0.54166667  0.50260417
   0.5         0.5         0.69791667  0.75        0.75        0.75        0.75
   0.55989583  0.5       ]
 [ 0.75        0.75        0.75        0.75        0.87760417  0.9296875
   0.9921875   0.97916667  0.67447917  0.734375    0.59114583  0.6171875
   0.70833333  0.75        0.75      ]
 [ 0.75        0.75        0.75        0.73177083  0.53645833  0.484375
   0.5         0.5         0.51302083  0.51822917  0.5625      0.625
   0.7421875   0.75        0.86458333]
 [ 0.75        0.75        0.75        0.7421875   0.6328125   0.6015625
   0.8046875   0.8984375   0.95052083  0.75        0.75        0.75        0.75
   0.75        0.75      ]
 [ 0.75        0.75        0.75520833  0.7421875   0.75        0.75        0.75
   0.75        0.69270833  0.50520833  0.55729167  0.52864583  0.5390625
   0.52864583  0.51041667]
 [ 0.75        0.75        0.75        0.55208333  0.6328125   0.56510417
   0.61197917  0.59895833  0.66666667  0.7421875   0.77864583  0.83854167
   0.8515625   0.85677083  0.86979167]]
Out[9]:
[<matplotlib.lines.Line2D at 0x7f39563fb190>]

In [23]:
figure()
title('Scores with DKT+MCTS Horizon 5 Dropout=1.0')
xlabel('training epoch')
ylabel('score')

data = np.load('experiments/earlystopping5.npz')

print(data['scores'])

xs = data['eps'][0,:]
ys = np.mean(data['scores'], axis=0)
err = np.std(data['scores'], axis=0)*1.96/np.sqrt(data['scores'].shape[0])

plot(xs, ys, color='#0000ff')
plot(xs, ys+err, color='#ccccff')
plot(xs, ys-err, color='#ccccff')

figure()
title('Val Loss with DKT Horizon 5 Dropout=1.0')
xlabel('training epoch')
ylabel('val loss')

ys = np.mean(data['vals'][:,:], axis=0)
plot(range(ys.shape[0]), ys, color='#ff00ff')
#plot(range(ys.shape[0]), data['vals'][8,:], color='#0000ff')

#figure()
#title('Val Loss vs score')
#xlabel('val loss')
#ylabel('score')

#xs = data['vals'][:,-3]
#ys = data['scores'][:,-2]
#plot(xs, ys, '.')


[[ 0.61458333  0.5         0.72135417  0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.515625    0.5       ]
 [ 0.51822917  0.69791667  0.7109375   0.62760417  0.73958333  0.71614583
   0.73177083  0.75        0.75        0.75        0.75        0.75        0.75
   0.75        0.75      ]
 [ 0.5         0.73958333  0.75        0.72916667  0.74479167  0.75        0.75
   0.75        0.65104167  0.63802083  0.6484375   0.67447917  0.69010417
   0.70052083  0.69791667]
 [ 0.51302083  0.65364583  0.53125     0.76302083  0.77864583  0.62239583
   0.75        0.75        0.75        0.75        0.75        0.75
   0.80208333  0.84895833  0.91666667]
 [ 0.51302083  0.5859375   0.74739583  0.73177083  0.97916667  0.75
   0.7578125   0.7578125   0.76302083  0.7890625   0.7734375   0.8046875
   0.8203125   0.78385417  0.77083333]
 [ 0.5         0.5         0.75520833  0.75        0.70833333  0.73697917
   0.73177083  0.75        0.75        0.75        0.74739583  0.7421875
   0.73697917  0.74479167  0.75      ]
 [ 0.5         0.5         0.55729167  0.5         0.5859375   0.59114583
   0.61197917  0.81770833  0.84635417  0.8046875   0.83072917  0.80989583
   0.80729167  0.80989583  0.80208333]
 [ 0.69010417  0.75        0.93489583  0.9375      0.8984375   0.84895833
   0.796875    0.734375    0.76822917  0.85416667  0.83333333  0.84114583
   0.80989583  0.83854167  0.8125    ]
 [ 0.75        0.52083333  0.83072917  0.82291667  0.84895833  0.9296875
   0.984375    0.92708333  0.90364583  0.88020833  0.91145833  0.921875
   0.90104167  0.90364583  0.94791667]
 [ 0.74739583  0.51041667  0.50260417  0.5         0.66927083  0.609375
   0.5         0.50260417  0.50260417  0.5         0.50260417  0.5         0.5
   0.50260417  0.51822917]]
Out[23]:
[<matplotlib.lines.Line2D at 0x7fc5350fc4d0>]

In [37]:
figure()
title('Scores with DKT+MCTS Horizon 6 Filtered Dropout=1.0')
xlabel('training epoch')
ylabel('score')

data1 = np.load('experiments/earlystopping6.npz')
data2 = np.load('experiments/earlystopping7.npz')

scores = np.vstack([data1['scores'], data2['scores']])
vals = np.vstack([data1['vals'], data2['vals']])

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d} [{}]'.format(r, ' '.join('{:.4f}'.format(x) for x in scores[r,:])))

xs = data1['eps'][0,:]
ys = np.mean(scores[:,:], axis=0)
err = np.std(scores, axis=0)*1.96/np.sqrt(scores.shape[0])

ys2 = scores[25,:]

plot(xs, ys, color='#0000ff')
plot(xs, ys+err, color='#ccccff')
plot(xs, ys-err, color='#ccccff')

plot(xs, ys2, color='#ff0000')

figure()
title('Val Loss with DKT Horizon 6 Filtered Dropout=1.0')
xlabel('training epoch')
ylabel('val loss')

ys = np.mean(vals[:,1:], axis=0)
err = np.std(vals[:,1:], axis=0)*1.96/np.sqrt(scores.shape[0])
plot(range(ys.shape[0]), ys, color='#0000ff')
plot(range(ys.shape[0]), ys+err, color='#ccccff')
plot(range(ys.shape[0]), ys-err, color='#ccccff')
plot(range(ys.shape[0]), vals[25,1:], color='#ff0000')
#plot(range(ys.shape[0]), np.repeat(0.0015, ys.shape), color='#00ff00')
#plot(range(ys.shape[0]), np.repeat(0.00155, ys.shape), color='#00ff00')


six.print_(ys)
#figure()
#title('Val Loss vs score')
#xlabel('val loss')
#ylabel('score')

#xs = data['vals'][:,-3]
#ys = data['scores'][:,-2]
#plot(xs, ys, '.')


 0 [0.5000 0.7500 0.7500 0.7500 0.7500 0.7500 0.7474 0.7448]
 1 [0.7500 1.0000 0.7656 0.7500 0.7500 0.7526 0.7500 0.7500]
 2 [0.7188 0.5312 0.6901 0.6068 0.5391 0.5885 0.6120 0.6536]
 3 [0.7031 0.5000 0.2995 0.5521 0.9089 1.0000 1.0000 0.9792]
 4 [0.7318 0.6927 0.7500 0.5573 0.6068 0.6120 0.5365 0.5208]
 5 [0.4193 0.3125 0.4479 0.5130 0.9010 1.0000 0.7500 0.7500]
 6 [0.7500 0.5000 0.5078 0.6042 0.6458 0.6745 0.6146 0.6589]
 7 [0.5391 0.7500 0.7682 0.7552 0.7708 1.0000 0.9922 0.7500]
 8 [0.4505 0.6953 0.7500 0.9010 0.7578 0.7682 0.7839 0.8177]
 9 [0.7500 0.5938 0.6042 0.9271 0.6979 0.5599 0.5729 0.6068]
10 [0.6589 0.7188 0.9688 0.5781 0.5000 0.5078 0.6406 0.6458]
11 [0.7344 0.5807 0.7370 0.6354 0.6328 0.5156 0.7396 0.7500]
12 [0.6615 0.7474 1.0000 1.0000 0.5026 0.5000 0.5000 0.5000]
13 [0.5208 0.5547 0.7005 0.7031 0.7396 0.6641 0.6172 0.6198]
14 [0.5000 0.6354 0.6953 0.6484 0.9870 0.7500 0.7500 0.7500]
15 [0.7292 0.5000 0.5026 0.5078 0.5078 0.5104 0.5078 0.5130]
16 [0.6536 0.7161 0.6328 0.7500 0.7552 0.7552 0.7448 0.7474]
17 [0.5000 0.5000 0.5000 0.5000 0.5000 0.5052 0.7500 0.7370]
18 [0.5651 0.5208 0.7500 0.7552 0.7891 0.7708 0.7448 0.5625]
19 [0.5052 0.2786 0.6536 0.7500 0.7500 0.7500 0.7500 0.6562]
20 [0.7500 1.0000 0.9818 0.7500 0.7578 0.7552 0.7604 0.7526]
21 [0.3021 0.2500 0.3333 0.9870 0.7500 0.7500 0.7500 0.7500]
22 [0.5000 0.5000 0.5000 0.5000 0.5000 0.5000 0.5000 0.5000]
23 [0.5078 0.5026 0.4948 1.0000 1.0000 1.0000 1.0000 1.0000]
24 [0.6406 0.3516 0.3672 0.7240 0.7630 0.6146 0.5000 0.5000]
25 [0.6667 0.7604 0.9896 0.7500 0.7500 1.0000 1.0000 1.0000]
26 [0.6849 0.7656 1.0000 1.0000 1.0000 0.7812 0.7995 1.0000]
27 [0.5911 0.3958 0.5260 0.6693 0.7500 0.7500 0.7500 0.7500]
28 [0.7188 0.5182 0.8568 0.7656 0.7891 0.7031 0.5312 0.6875]
29 [0.5000 0.6510 0.5000 0.5000 0.5000 0.5000 0.5000 0.5000]
[ 0.00415228  0.00287496  0.00224688  0.00191711  0.00174167  0.00164383
  0.00158316  0.00155703  0.00154182  0.00152257  0.00151535  0.00149582
  0.00149273  0.00148459  0.00148421]

In [66]:
data1 = np.load('experiments/dropoutput2.npz')

vals = data1['vals'][0,:,:]

#for r in six.moves.range(vals.shape[0]):
#    six.print_('{:2d} [{}]'.format(r, ' '.join('{:.4f}'.format(x) for x in vals[r,:])))

figure()
title('Val Loss with DKT Horizon 6 Filtered Dropout=1.0')
xlabel('training epoch')
ylabel('val loss')

xs = np.arange(vals.shape[1]) + 1.0
ys = np.mean(vals, axis=0)
err = np.std(vals, axis=0)*1.96/np.sqrt(vals.shape[0])
plot(xs, ys, color='#0000ff')
plot(xs, ys+err, color='#ccccff')
plot(xs, ys-err, color='#ccccff')
#ylim(0.001, 0.002)

val_limit = np.mean(ys[-2:])
sig_acc = 0.00001
six.print_('Val limit: {:.4}'.format(val_limit))
six.print_('Val threshold using sig dig {} is {:.4}'.format(sig_acc, val_limit +sig_acc))
plot(xs, np.repeat(val_limit, xs.shape), color='#00ff00')
plot(xs, np.repeat(val_limit + rel_acc*val_limit, xs.shape), color='#00ff00')
# looks like epoch 12 is when val loss has converged within 3 significant digits


Val limit: 0.001503
Val threshold using sig dig 1e-05 is 0.001513
Out[66]:
[<matplotlib.lines.Line2D at 0x7fc5332f4710>]

In [77]:
data1 = np.load('experiments/dropoutput3.npz')
data2 = np.load('experiments/earlystopping5.npz')

#vals = data1['vals'][0,:,:]
vals = np.vstack([data1['vals'][0,:,:], data2['vals'][:,:20]])

#for r in six.moves.range(vals.shape[0]):
#    six.print_('{:2d} [{}]'.format(r, ' '.join('{:.4f}'.format(x) for x in vals[r,:])))

figure()
title('Val Loss with DKT Horizon 5 Dropout=1.0')
xlabel('training epoch')
ylabel('val loss')

xs = np.arange(vals.shape[1]) + 1.0
ys = np.mean(vals, axis=0)
err = np.std(vals, axis=0)*1.96/np.sqrt(vals.shape[0])
plot(xs, ys, color='#0000ff')
plot(xs, ys+err, color='#ccccff')
plot(xs, ys-err, color='#ccccff')
#ylim(0.001, 0.002)
#xlim(10,20)

val_limit = np.mean(ys[-2:])
sig_acc = 0.00001
six.print_('Val limit: {:.4}'.format(val_limit))
six.print_('Val threshold using sig dig {} is {:.4}'.format(sig_acc, val_limit +sig_acc))
plot(xs, np.repeat(val_limit, xs.shape), color='#00ff00')
plot(xs, np.repeat(val_limit + rel_acc*val_limit, xs.shape), color='#00ff00')
# looks like epoch 14 is when val loss has converged within 3 significant digits


Val limit: 0.001756
Val threshold using sig dig 1e-05 is 0.001766
Out[77]:
[<matplotlib.lines.Line2D at 0x7fc532ae7610>]

In [83]:
data1 = np.load('experiments/dropoutput4.npz')

vals = data1['vals'][0,:,:]

#for r in six.moves.range(vals.shape[0]):
#    six.print_('{:2d} [{}]'.format(r, ' '.join('{:.4f}'.format(x) for x in vals[r,:])))

figure()
title('Val Loss with DKT Horizon 7 Filtered Dropout=1.0')
xlabel('training epoch')
ylabel('val loss')

xs = np.arange(vals.shape[1]) + 1.0
ys = np.mean(vals, axis=0)
err = np.std(vals, axis=0)*1.96/np.sqrt(vals.shape[0])
plot(xs, ys, color='#0000ff')
plot(xs, ys+err, color='#ccccff')
plot(xs, ys-err, color='#ccccff')
ylim(0.001, 0.0014)
xlim(8,14)

val_limit = np.mean(ys[-2:])
sig_acc = 0.00001
six.print_('Val limit: {:.4}'.format(val_limit))
six.print_('Val threshold using sig dig {} is {:.4}'.format(sig_acc, val_limit +sig_acc))
plot(xs, np.repeat(val_limit, xs.shape), color='#00ff00')
plot(xs, np.repeat(val_limit + rel_acc*val_limit, xs.shape), color='#00ff00')
# looks like epoch 10 is when val loss has converged within 3 significant digits


Val limit: 0.001267
Val threshold using sig dig 1e-05 is 0.001277
Out[83]:
[<matplotlib.lines.Line2D at 0x7fc53269f5d0>]

In [40]:
data1 = np.load('experiments/earlystopping8.npz')
data2 = np.load('experiments/earlystopping10.npz')

vals = np.vstack([data1['vals'], data2['vals']])
val_avg = np.mean(vals, axis=1)

scores = np.concatenate([data1['scores'][:,0],data2['scores'][:,0]])

#for r in six.moves.range(vals.shape[0]):
#    six.print_('{:2d} [{}]'.format(r, ' '.join('{:.4f}'.format(x) for x in vals[r,:])))

#for r in six.moves.range(val_sums.shape[0]):
#    six.print_('{:2d} {:.5f} {:.4f}'.format(r, val_avg[r], scores[r]))

figure()
title('Scores with DKT+MCTS Horizon 5 Dropout=1.0 Epoch 14')
xlabel('training epoch')
ylabel('posttest score')

xs = np.concatenate([data1['eps'],data2['eps']])
ys = scores

plot(xs, ys, '.', color='#0000ff')

figure()
title('Horizon 5 Dropout=1.0 Epoch 14 Loss-Score')
xlabel('avg val loss')
ylabel('posttest score')
xs = val_avg
ys = scores
plot(xs, ys, '.', color='#0000ff')


Out[40]:
[<matplotlib.lines.Line2D at 0x7efd21997090>]

In [45]:
data1 = np.load('experiments/earlystopping9.npz')
data2 = np.load('experiments/earlystopping11.npz')

vals = np.vstack([data1['vals'],data2['vals']])
val_sums = np.sum(vals[:,:], axis=1)
scores = np.concatenate([data1['scores'][:,0],data2['scores'][:,0]])

#for r in six.moves.range(vals.shape[0]):
#    six.print_('{:2d} [{}]'.format(r, ' '.join('{:.4f}'.format(x) for x in vals[r,:])))

#for r in six.moves.range(val_sums.shape[0]):
#    six.print_('{:2d} {:.5f} {:.4f}'.format(r, val_sums[r], scores[r]))

figure()
title('Scores with DKT+MCTS Horizon 6 Filtered Dropout=1.0 Epoch 12')
xlabel('training epoch')
ylabel('posttest score')

xs = np.concatenate([data1['eps'],data2['eps']])
ys = scores

plot(xs, ys, '.', color='#0000ff')

figure()
title('Horizon 6 Filtered Dropout=1.0 Epoch 12 Loss-Score')
xlabel('sum of val loss')
ylabel('posttest score')
xs = val_sums
ys = scores
plot(xs, ys, '.', color='#0000ff')


Out[45]:
[<matplotlib.lines.Line2D at 0x7efd21ee89d0>]

In [9]:
data1 = np.load('experiments/earlystopping12.npz')

vals = data1['vals']
val_sums = np.sum(vals[:,:], axis=1)
scores = data1['scores'][:,0]
qs = data1['qs'][:,0]

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d} {:.4f} {:.4f}'.format(r, scores[r], qs[r]))

figure()
title('Horizon 5 Dropout=1.0 Epoch 14 q-value-Score')
xlabel('qs')
ylabel('posttest score')
xs = qs
ys = scores
plot(xs, ys, '.', color='#0000ff')


 0 0.6979 3.5845
 1 0.6380 3.0328
 2 1.0000 3.5176
 3 0.7500 3.1657
 4 0.5208 3.6017
 5 0.9062 3.0751
 6 0.7500 3.0099
 7 0.6849 3.5839
 8 0.7500 3.1730
 9 0.9036 3.4734
10 0.5000 3.0675
11 0.7500 3.3549
12 0.7500 3.3046
13 0.7500 3.2964
14 0.5000 3.5954
15 1.0000 3.3843
16 0.7500 3.3653
17 0.7500 2.9623
18 0.5938 2.9451
19 0.7500 3.2618
Out[9]:
[<matplotlib.lines.Line2D at 0x7f393ee6fa50>]

In [10]:
data1 = np.load('experiments/earlystopping14.npz')
data2 = np.load('experiments/earlystopping15.npz')

scores = np.concatenate([data1['scores'][:,0],data2['scores'][:,0]])
qs = np.concatenate([data1['qs'][:,0],data2['qs'][:,0]])

#for r in six.moves.range(scores.shape[0]):
#    six.print_('{:2d} {:.4f} {:.4f}'.format(r, scores[r], qs[r]))

figure()
title('Horizon 5 Dropout=1.0 Epoch 14 value-Score')
xlabel('qs')
ylabel('posttest score')
xs = qs
ys = scores
plot(xs, ys, '.', color='#0000ff')
xlim(3.9,4.0)


Out[10]:
(3.9, 4.0)

In [15]:
data1 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data2 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

data3 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts1000-trajectories100-real0-runA.npz')
data4 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts1000-trajectories100-real0-runB.npz')

data5 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policy-rtype1-trajectories100-runA.npz')
data6 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policy-rtype1-trajectories100-runB.npz')

data7 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runA.npz')
data8 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runB.npz')

trueqvals = np.concatenate([data1['qvals'][:,0],data2['qvals'][:,0]])
scores = np.concatenate([data1['scores'][:,0],data2['scores'][:,0]])

oldrewards = np.concatenate([data5['rewards'][:,0], data6['rewards'][:,0]])
rewards = np.concatenate([data7['rewards'][:,0], data8['rewards'][:,0]])

falseqvals = np.concatenate([data3['qvals'][:,0],data4['qvals'][:,0]])

for r in six.moves.range(scores.shape[0]):
    #six.print_('{:2d}: score {:.3f} true qval {:.4f} false qvals {:.4f} rewards {:.4f}'.format(r, scores[r], trueqvals[r], falseqvals[r], rewards[r]))
    pass

figure()
title('Horizon 5 Dropout=1.0 Epoch 14 Score correlation')
xlabel('optimal policy reward prediction')
ylabel('posttest score')
plot(rewards, scores, '.')
#xlim(3.9,4.0)


Out[15]:
[<matplotlib.lines.Line2D at 0x7fe9a2c20990>]

In [9]:
data1 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts1000-trajectories100-real1-runA.npz')
data2 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts1000-trajectories100-real1-runB.npz')

data3 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data4 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

scores1 = np.concatenate([data1['scores'][:,0],data2['scores'][:,0]])
scores2 = np.concatenate([data3['scores'][:,0],data4['scores'][:,0]])

figure()
title('Horizon 5 Dropout=1.0 Epoch 14 Scores correlation')
xlabel('3000 scores')
ylabel('1000 scores')
plot(scores2, scores1, '.')
#xlim(3.9,4.0)


Out[9]:
[<matplotlib.lines.Line2D at 0x7fe9a2c0a510>]

In [47]:
'''
Trying to find a stopping point for horizon 5 dropout 0.9
'''
data1 = np.load('experiments/test2_model_small-dropout9-shuffle0-data-test2-n100000-l5-random.pickle/stats-stopping.npz')

vloss = data1['vloss']

xs = list(six.moves.range(vloss.shape[1]))
ys_mean = np.mean(vloss,axis=0)
ys_err = np.std(vloss,axis=0)/np.sqrt(vloss.shape[0])

last_val = ys_mean[-1]
thresh = last_val + 0.0001
six.print_('thresh {:4f} last val {:4f}'.format(thresh, last_val))

figure()
title('Horizon 5 Dropout=0.9 Val Loss')
plot(xs, ys_mean,color='#0000ff')
plot(xs, ys_mean+ys_err,color='#ccccff')
plot(xs, ys_mean-ys_err,color='#ccccff')
plot([xs[0], xs[-1]], [last_val, last_val], color='#00ff00')
plot([xs[0], xs[-1]], [thresh, thresh], color='#00ff00')
xlabel('training epoch')
ylabel('val loss')
xlim(10,15)
ylim(0.0023,0.0025)
# 2 significant digits epoch index 13


thresh 0.002489 last val 0.002389
Out[47]:
(0.0023, 0.0025)

In [5]:
'''
Trying to find a stopping point for horizon 5 dropout 0.8
'''
data1 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-stopping.npz')

vloss = data1['vloss']

xs = list(six.moves.range(vloss.shape[1]))
ys_mean = np.mean(vloss,axis=0)
ys_err = np.std(vloss,axis=0)/np.sqrt(vloss.shape[0])

last_val = ys_mean[-1]
thresh = last_val + 0.00001
six.print_('thresh {:4f} last val {:4f}'.format(thresh, last_val))

figure()
title('Horizon 5 Dropout=0.8 Val Loss')
plot(xs, ys_mean,color='#0000ff')
plot(xs, ys_mean+ys_err,color='#ccccff')
plot(xs, ys_mean-ys_err,color='#ccccff')
plot([xs[0], xs[-1]], [last_val, last_val], color='#00ff00')
plot([xs[0], xs[-1]], [thresh, thresh], color='#00ff00')
xlabel('training epoch')
ylabel('val loss')
#xlim(20,40)
ylim(0.0048,0.0050)
# 2 significant digits epoch index 23
# 3 significant digits looks around 60


thresh 0.004876 last val 0.004866
Out[5]:
(0.0048, 0.005)

In [13]:
'''
Checking correlations
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runC.npz')
data13 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runD.npz')

data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runC.npz')
data23 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real1-runD.npz')

data31 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runA.npz')
data32 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runC.npz')
data33 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories400-real0-runD.npz')

data41 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runA.npz')
data42 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runC.npz')
data43 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runD.npz')

data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runC.npz')
data53 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runD.npz')

data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runC.npz')
data63 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runD.npz')

vloss = np.concatenate([data11['vloss'],data12['vloss'],data13['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0],data23['scores'][:,0]])
#trueqvals = np.concatenate([data21['qvals'][:,0],data22['qvals'][:,0],data23['qvals'][:,0]])
#falseqvals = np.concatenate([data31['qvals'][:,0],data32['qvals'][:,0],data33['qvals'][:,0]])
rewards = np.concatenate([data41['rewards'][:,0],data42['rewards'][:,0],data43['rewards'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0],data53['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts'],data63['opts']])
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:],data63['qs'][:,0,:,:]])

sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
    for t in six.moves.range(6):
        six.print_('  step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
    pass

figure()
title('Horizon 5 Dropout=0.8 Epoch 23 Scores')
xlabel('initial qvals rollout 100000')
ylabel('posttest scores')
plot(initialq, scores, '.',color='#0000ff')
#xlim(3.4,3.6)


 0: score 0.750 initialq 3.43 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.16 3.27 2.27 2.16 ]
  step 1 qfunc [ 2.36 3.37 2.37 2.37 ]
  step 2 qfunc [ 2.43 2.71 2.41 3.49 ]
  step 3 qfunc [ 3.01 2.89 2.89 3.51 ]
  step 4 qfunc [ 3.36 3.40 3.52 2.62 ]
  step 5 qfunc [ 3.54 3.46 3.53 3.13 ]
 1: score 0.750 initialq 3.40 opt [[2 1 1 3 3 0]]
  step 0 qfunc [ 2.26 2.61 3.31 2.29 ]
  step 1 qfunc [ 2.39 3.37 2.33 2.33 ]
  step 2 qfunc [ 2.58 3.40 2.58 2.65 ]
  step 3 qfunc [ 3.12 3.07 2.77 3.44 ]
  step 4 qfunc [ 3.44 3.41 2.76 3.45 ]
  step 5 qfunc [ 3.46 3.44 3.27 3.45 ]
 2: score 0.750 initialq 3.39 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.23 2.49 3.27 2.21 ]
  step 1 qfunc [ 2.35 3.34 2.29 2.41 ]
  step 2 qfunc [ 2.66 3.37 2.54 2.68 ]
  step 3 qfunc [ 3.05 3.42 2.59 3.25 ]
  step 4 qfunc [ 3.06 3.15 2.61 3.45 ]
  step 5 qfunc [ 3.45 3.19 3.46 3.27 ]
 3: score 0.753 initialq 3.37 opt [[1 1 3 2 3 0]]
  step 0 qfunc [ 2.28 3.20 2.38 2.23 ]
  step 1 qfunc [ 2.41 3.30 2.56 2.39 ]
  step 2 qfunc [ 2.41 2.58 2.64 3.38 ]
  step 3 qfunc [ 2.90 2.79 3.40 3.12 ]
  step 4 qfunc [ 2.97 3.31 2.47 3.42 ]
  step 5 qfunc [ 3.44 3.22 3.43 3.02 ]
 4: score 1.000 initialq 3.36 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.28 3.17 2.40 2.25 ]
  step 1 qfunc [ 2.43 3.27 2.60 2.43 ]
  step 2 qfunc [ 2.45 2.62 2.68 3.41 ]
  step 3 qfunc [ 2.89 2.85 3.43 2.85 ]
  step 4 qfunc [ 3.30 3.20 3.46 3.36 ]
  step 5 qfunc [ 3.00 3.32 2.86 3.49 ]
 5: score 1.000 initialq 3.33 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.25 3.18 2.47 2.33 ]
  step 1 qfunc [ 2.41 2.58 3.27 2.45 ]
  step 2 qfunc [ 2.58 3.31 2.58 2.60 ]
  step 3 qfunc [ 2.79 2.84 2.82 3.39 ]
  step 4 qfunc [ 3.21 2.93 3.34 3.41 ]
  step 5 qfunc [ 2.60 2.51 3.46 2.56 ]
 6: score 1.000 initialq 3.33 opt [[1 1 2 3 3 2]]
  step 0 qfunc [ 2.26 3.11 2.37 2.24 ]
  step 1 qfunc [ 2.38 3.26 2.55 2.44 ]
  step 2 qfunc [ 2.39 2.49 3.35 2.78 ]
  step 3 qfunc [ 2.50 2.63 2.54 3.38 ]
  step 4 qfunc [ 2.73 2.78 2.55 3.39 ]
  step 5 qfunc [ 2.84 2.59 3.40 2.61 ]
 7: score 0.750 initialq 3.32 opt [[1 1 3 2 3 0]]
  step 0 qfunc [ 2.28 3.14 2.36 2.17 ]
  step 1 qfunc [ 2.45 3.24 2.48 2.43 ]
  step 2 qfunc [ 2.50 2.84 2.56 3.29 ]
  step 3 qfunc [ 2.96 3.06 3.32 2.94 ]
  step 4 qfunc [ 3.22 3.28 2.90 3.34 ]
  step 5 qfunc [ 3.37 3.14 3.28 2.73 ]
 8: score 1.000 initialq 3.32 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.18 3.15 2.32 2.23 ]
  step 1 qfunc [ 2.33 3.24 2.47 2.37 ]
  step 2 qfunc [ 2.29 2.50 2.51 3.41 ]
  step 3 qfunc [ 2.95 2.59 3.44 2.93 ]
  step 4 qfunc [ 3.35 2.95 3.20 3.46 ]
  step 5 qfunc [ 3.03 2.76 3.47 2.62 ]
 9: score 1.000 initialq 3.31 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.22 3.12 2.32 2.18 ]
  step 1 qfunc [ 2.34 3.23 2.44 2.30 ]
  step 2 qfunc [ 2.44 2.47 2.45 3.37 ]
  step 3 qfunc [ 2.92 2.76 3.27 3.41 ]
  step 4 qfunc [ 3.02 2.73 3.43 2.65 ]
  step 5 qfunc [ 3.38 3.12 3.45 2.74 ]
10: score 1.000 initialq 3.30 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.19 3.10 2.34 2.21 ]
  step 1 qfunc [ 2.37 3.22 2.49 2.36 ]
  step 2 qfunc [ 2.33 2.49 2.54 3.35 ]
  step 3 qfunc [ 2.64 2.53 3.37 2.86 ]
  step 4 qfunc [ 2.94 2.90 2.69 3.38 ]
  step 5 qfunc [ 3.22 2.59 3.39 2.72 ]
11: score 1.000 initialq 3.29 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.29 3.09 2.39 2.26 ]
  step 1 qfunc [ 2.42 3.22 2.53 2.42 ]
  step 2 qfunc [ 2.30 2.46 2.53 3.35 ]
  step 3 qfunc [ 2.71 2.55 3.38 2.90 ]
  step 4 qfunc [ 3.18 3.18 2.82 3.40 ]
  step 5 qfunc [ 3.39 2.69 3.43 2.64 ]
12: score 0.966 initialq 3.29 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.20 3.11 2.36 2.23 ]
  step 1 qfunc [ 2.34 3.22 2.57 2.39 ]
  step 2 qfunc [ 2.47 2.62 2.73 3.29 ]
  step 3 qfunc [ 2.68 2.60 3.31 2.68 ]
  step 4 qfunc [ 3.22 3.05 2.83 3.33 ]
  step 5 qfunc [ 3.34 2.96 3.34 2.74 ]
13: score 1.000 initialq 3.28 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.22 3.11 2.25 2.17 ]
  step 1 qfunc [ 2.38 3.22 2.56 2.37 ]
  step 2 qfunc [ 2.32 2.55 2.54 3.38 ]
  step 3 qfunc [ 2.68 2.70 3.40 2.92 ]
  step 4 qfunc [ 3.04 2.96 2.76 3.41 ]
  step 5 qfunc [ 3.28 3.05 3.48 2.85 ]
14: score 1.000 initialq 3.27 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 3.10 2.40 2.26 ]
  step 1 qfunc [ 2.42 3.19 2.56 2.38 ]
  step 2 qfunc [ 2.21 2.60 2.53 3.28 ]
  step 3 qfunc [ 3.08 2.81 3.31 3.10 ]
  step 4 qfunc [ 3.22 3.07 3.30 3.34 ]
  step 5 qfunc [ 3.21 3.08 3.37 2.73 ]
15: score 1.000 initialq 3.26 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.31 3.11 2.44 2.27 ]
  step 1 qfunc [ 2.44 2.54 3.21 2.46 ]
  step 2 qfunc [ 2.54 3.24 2.53 2.57 ]
  step 3 qfunc [ 2.61 2.92 2.60 3.32 ]
  step 4 qfunc [ 3.03 3.09 2.80 3.33 ]
  step 5 qfunc [ 3.14 2.81 3.39 2.65 ]
16: score 0.742 initialq 3.26 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.24 3.02 2.38 2.24 ]
  step 1 qfunc [ 2.32 3.17 2.41 2.35 ]
  step 2 qfunc [ 2.36 2.59 2.64 3.27 ]
  step 3 qfunc [ 2.68 2.80 2.82 3.30 ]
  step 4 qfunc [ 3.25 3.09 3.36 2.97 ]
  step 5 qfunc [ 3.38 3.31 3.08 2.86 ]
17: score 1.000 initialq 3.26 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 3.07 2.49 2.25 ]
  step 1 qfunc [ 2.43 3.18 2.65 2.41 ]
  step 2 qfunc [ 2.59 2.60 2.89 3.31 ]
  step 3 qfunc [ 2.95 2.70 3.33 3.13 ]
  step 4 qfunc [ 3.21 3.08 3.25 3.36 ]
  step 5 qfunc [ 2.98 2.79 3.40 2.65 ]
18: score 1.000 initialq 3.25 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.25 3.10 2.34 2.17 ]
  step 1 qfunc [ 2.30 3.19 2.50 2.34 ]
  step 2 qfunc [ 2.44 2.66 2.53 3.26 ]
  step 3 qfunc [ 2.71 2.72 3.28 2.97 ]
  step 4 qfunc [ 3.10 3.15 3.07 3.30 ]
  step 5 qfunc [ 3.34 3.19 3.34 3.17 ]
19: score 0.500 initialq 3.25 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.27 3.05 2.41 2.19 ]
  step 1 qfunc [ 2.40 3.17 2.57 2.44 ]
  step 2 qfunc [ 2.52 2.66 2.59 3.26 ]
  step 3 qfunc [ 2.90 2.80 3.29 2.70 ]
  step 4 qfunc [ 3.31 3.22 2.67 3.27 ]
  step 5 qfunc [ 3.32 3.18 2.87 2.83 ]
20: score 1.000 initialq 3.25 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.26 3.10 2.47 2.25 ]
  step 1 qfunc [ 2.44 2.59 3.19 2.46 ]
  step 2 qfunc [ 2.55 3.23 2.52 2.59 ]
  step 3 qfunc [ 2.77 2.81 2.61 3.31 ]
  step 4 qfunc [ 3.13 2.96 2.98 3.33 ]
  step 5 qfunc [ 3.16 3.03 3.41 2.83 ]
21: score 1.000 initialq 3.25 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.19 3.07 2.31 2.18 ]
  step 1 qfunc [ 2.43 3.18 2.49 2.38 ]
  step 2 qfunc [ 2.50 2.52 2.53 3.33 ]
  step 3 qfunc [ 2.81 2.71 3.35 3.05 ]
  step 4 qfunc [ 2.96 3.15 2.63 3.36 ]
  step 5 qfunc [ 3.40 3.10 3.41 2.79 ]
22: score 1.000 initialq 3.25 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.25 3.06 2.33 2.27 ]
  step 1 qfunc [ 2.39 3.16 2.50 2.37 ]
  step 2 qfunc [ 2.48 2.56 2.52 3.29 ]
  step 3 qfunc [ 2.86 2.63 3.10 3.31 ]
  step 4 qfunc [ 3.04 2.99 3.38 2.71 ]
  step 5 qfunc [ 3.32 3.26 3.40 2.94 ]
23: score 1.000 initialq 3.22 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.24 3.01 2.34 2.23 ]
  step 1 qfunc [ 2.32 3.14 2.42 2.30 ]
  step 2 qfunc [ 2.24 2.48 2.43 3.25 ]
  step 3 qfunc [ 2.72 2.56 2.38 3.28 ]
  step 4 qfunc [ 3.14 2.72 3.28 2.77 ]
  step 5 qfunc [ 3.24 3.03 3.30 3.01 ]
24: score 1.000 initialq 3.22 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 3.06 2.39 2.24 ]
  step 1 qfunc [ 2.38 3.17 2.61 2.41 ]
  step 2 qfunc [ 2.36 2.42 2.63 3.33 ]
  step 3 qfunc [ 2.57 2.50 3.35 3.04 ]
  step 4 qfunc [ 2.91 2.98 2.78 3.36 ]
  step 5 qfunc [ 3.37 3.18 3.44 3.10 ]
25: score 0.750 initialq 3.22 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.26 2.96 2.34 2.26 ]
  step 1 qfunc [ 2.35 3.14 2.44 2.39 ]
  step 2 qfunc [ 2.27 2.43 2.53 3.29 ]
  step 3 qfunc [ 2.48 2.38 2.52 3.31 ]
  step 4 qfunc [ 2.62 2.58 3.41 3.02 ]
  step 5 qfunc [ 2.98 3.19 2.78 3.42 ]
26: score 1.000 initialq 3.21 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.27 3.04 2.48 2.32 ]
  step 1 qfunc [ 2.47 2.57 3.15 2.45 ]
  step 2 qfunc [ 2.55 3.19 2.55 2.55 ]
  step 3 qfunc [ 2.63 2.83 2.57 3.29 ]
  step 4 qfunc [ 2.93 2.76 3.06 3.30 ]
  step 5 qfunc [ 2.65 2.52 3.30 2.55 ]
27: score 1.000 initialq 3.21 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.27 3.05 2.38 2.23 ]
  step 1 qfunc [ 2.36 3.14 2.51 2.39 ]
  step 2 qfunc [ 2.43 2.60 2.60 3.28 ]
  step 3 qfunc [ 2.79 2.74 3.29 2.89 ]
  step 4 qfunc [ 3.16 3.06 3.16 3.32 ]
  step 5 qfunc [ 3.09 2.81 3.37 2.71 ]
28: score 0.750 initialq 3.20 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.28 2.97 2.39 2.21 ]
  step 1 qfunc [ 2.42 3.10 2.55 2.39 ]
  step 2 qfunc [ 2.58 2.59 2.57 3.21 ]
  step 3 qfunc [ 2.87 2.64 2.80 3.24 ]
  step 4 qfunc [ 3.26 2.92 3.20 3.02 ]
  step 5 qfunc [ 2.63 2.62 3.28 2.56 ]
29: score 0.999 initialq 3.20 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.24 3.03 2.47 2.29 ]
  step 1 qfunc [ 2.45 2.58 3.13 2.44 ]
  step 2 qfunc [ 2.61 3.18 2.57 2.61 ]
  step 3 qfunc [ 2.74 2.83 2.59 3.28 ]
  step 4 qfunc [ 2.93 2.93 2.73 3.29 ]
  step 5 qfunc [ 3.26 3.10 3.29 3.20 ]
30: score 1.000 initialq 3.18 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.25 2.45 3.05 2.23 ]
  step 1 qfunc [ 2.23 3.13 2.30 2.30 ]
  step 2 qfunc [ 2.51 3.16 2.46 2.54 ]
  step 3 qfunc [ 2.65 2.79 2.63 3.28 ]
  step 4 qfunc [ 3.15 2.84 3.11 3.29 ]
  step 5 qfunc [ 3.06 2.70 3.44 2.98 ]
31: score 0.993 initialq 3.17 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 2.96 2.37 2.26 ]
  step 1 qfunc [ 2.34 3.10 2.47 2.35 ]
  step 2 qfunc [ 2.38 2.63 2.50 3.24 ]
  step 3 qfunc [ 2.47 2.81 3.26 2.87 ]
  step 4 qfunc [ 2.84 2.74 2.69 3.27 ]
  step 5 qfunc [ 2.72 2.47 3.41 2.54 ]
32: score 0.500 initialq 3.17 opt [[1 1 1 3 2 1]]
  step 0 qfunc [ 2.19 2.97 2.31 2.18 ]
  step 1 qfunc [ 2.32 3.09 2.41 2.32 ]
  step 2 qfunc [ 2.35 3.25 2.42 2.60 ]
  step 3 qfunc [ 2.39 2.48 2.49 3.37 ]
  step 4 qfunc [ 2.88 2.68 3.40 2.64 ]
  step 5 qfunc [ 3.39 3.41 2.94 2.97 ]
33: score 0.988 initialq 3.17 opt [[1 1 2 3 3 2]]
  step 0 qfunc [ 2.26 2.92 2.36 2.26 ]
  step 1 qfunc [ 2.40 3.11 2.50 2.46 ]
  step 2 qfunc [ 2.54 2.44 3.30 2.59 ]
  step 3 qfunc [ 2.58 2.53 2.60 3.32 ]
  step 4 qfunc [ 2.78 2.45 2.72 3.34 ]
  step 5 qfunc [ 2.61 2.49 3.32 2.59 ]
34: score 0.504 initialq 3.17 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.24 3.00 2.41 2.26 ]
  step 1 qfunc [ 2.36 2.52 3.11 2.42 ]
  step 2 qfunc [ 2.48 3.14 2.50 2.49 ]
  step 3 qfunc [ 2.58 3.22 2.52 2.85 ]
  step 4 qfunc [ 3.01 3.10 2.55 3.30 ]
  step 5 qfunc [ 3.30 3.21 3.18 3.22 ]
35: score 0.999 initialq 3.16 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.20 2.98 2.38 2.25 ]
  step 1 qfunc [ 2.38 2.55 3.10 2.40 ]
  step 2 qfunc [ 2.47 3.15 2.48 2.58 ]
  step 3 qfunc [ 2.78 2.84 2.62 3.24 ]
  step 4 qfunc [ 3.06 2.94 2.63 3.26 ]
  step 5 qfunc [ 2.63 2.61 3.35 2.55 ]
36: score 1.000 initialq 3.16 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.28 2.98 2.40 2.23 ]
  step 1 qfunc [ 2.36 3.07 2.56 2.38 ]
  step 2 qfunc [ 2.56 2.67 2.72 3.26 ]
  step 3 qfunc [ 2.88 2.86 3.28 3.10 ]
  step 4 qfunc [ 3.22 3.07 3.30 3.30 ]
  step 5 qfunc [ 3.08 2.96 3.05 3.34 ]
37: score 0.750 initialq 3.16 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.22 2.92 2.38 2.23 ]
  step 1 qfunc [ 2.43 3.05 2.57 2.50 ]
  step 2 qfunc [ 2.65 2.72 2.60 3.19 ]
  step 3 qfunc [ 2.82 2.82 2.97 3.22 ]
  step 4 qfunc [ 3.30 3.10 3.19 2.60 ]
  step 5 qfunc [ 2.62 2.62 3.30 2.55 ]
38: score 0.750 initialq 3.15 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.28 2.86 2.41 2.25 ]
  step 1 qfunc [ 2.37 3.08 2.49 2.40 ]
  step 2 qfunc [ 2.35 2.55 2.49 3.25 ]
  step 3 qfunc [ 2.47 2.50 2.54 3.27 ]
  step 4 qfunc [ 2.65 2.49 3.32 3.26 ]
  step 5 qfunc [ 3.34 3.14 2.92 3.28 ]
39: score 0.750 initialq 3.15 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.28 2.91 2.43 2.28 ]
  step 1 qfunc [ 2.39 3.07 2.51 2.38 ]
  step 2 qfunc [ 2.46 2.49 2.53 3.21 ]
  step 3 qfunc [ 2.56 2.47 2.75 3.24 ]
  step 4 qfunc [ 3.02 2.90 3.34 2.87 ]
  step 5 qfunc [ 3.36 3.29 3.14 3.28 ]
40: score 1.000 initialq 3.14 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.23 2.95 2.31 2.24 ]
  step 1 qfunc [ 2.31 3.07 2.35 2.31 ]
  step 2 qfunc [ 2.47 2.53 2.38 3.17 ]
  step 3 qfunc [ 2.82 2.70 3.20 2.93 ]
  step 4 qfunc [ 2.38 2.70 2.32 3.21 ]
  step 5 qfunc [ 3.29 3.11 3.47 2.84 ]
41: score 0.750 initialq 3.13 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.21 2.90 2.29 2.16 ]
  step 1 qfunc [ 2.32 3.05 2.44 2.38 ]
  step 2 qfunc [ 2.30 2.41 2.42 3.21 ]
  step 3 qfunc [ 2.40 2.55 2.50 3.23 ]
  step 4 qfunc [ 2.93 3.09 3.41 3.19 ]
  step 5 qfunc [ 3.36 3.28 3.22 3.43 ]
42: score 0.500 initialq 3.12 opt [[1 1 3 1 0 2]]
  step 0 qfunc [ 2.26 2.85 2.41 2.25 ]
  step 1 qfunc [ 2.35 3.03 2.48 2.35 ]
  step 2 qfunc [ 2.33 2.57 2.47 3.21 ]
  step 3 qfunc [ 2.81 3.24 2.19 2.84 ]
  step 4 qfunc [ 3.27 2.56 2.54 2.97 ]
  step 5 qfunc [ 2.62 2.53 3.33 2.61 ]
43: score 1.000 initialq 3.12 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.19 2.91 2.29 2.23 ]
  step 1 qfunc [ 2.27 3.04 2.38 2.34 ]
  step 2 qfunc [ 2.38 2.42 2.49 3.20 ]
  step 3 qfunc [ 2.65 2.55 2.90 3.22 ]
  step 4 qfunc [ 2.98 2.81 3.32 2.72 ]
  step 5 qfunc [ 3.22 3.31 3.34 2.93 ]
44: score 1.000 initialq 3.12 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.23 2.91 2.40 2.24 ]
  step 1 qfunc [ 2.38 2.43 3.05 2.40 ]
  step 2 qfunc [ 2.51 3.10 2.53 2.52 ]
  step 3 qfunc [ 2.59 2.79 2.54 3.19 ]
  step 4 qfunc [ 2.76 2.68 2.64 3.20 ]
  step 5 qfunc [ 2.59 2.44 3.47 2.56 ]
45: score 0.500 initialq 3.12 opt [[1 1 1 3 0 2]]
  step 0 qfunc [ 2.25 2.92 2.37 2.23 ]
  step 1 qfunc [ 2.37 3.06 2.47 2.40 ]
  step 2 qfunc [ 2.29 3.21 2.53 2.67 ]
  step 3 qfunc [ 2.59 2.62 2.83 3.33 ]
  step 4 qfunc [ 3.36 3.07 3.36 3.16 ]
  step 5 qfunc [ 2.55 2.49 3.38 2.56 ]
46: score 0.996 initialq 3.11 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.31 2.91 2.49 2.30 ]
  step 1 qfunc [ 2.41 2.51 3.05 2.41 ]
  step 2 qfunc [ 2.50 3.09 2.51 2.52 ]
  step 3 qfunc [ 2.54 2.86 2.53 3.21 ]
  step 4 qfunc [ 2.66 2.83 2.65 3.22 ]
  step 5 qfunc [ 2.92 2.40 3.29 2.67 ]
47: score 0.750 initialq 3.11 opt [[1 1 3 3 2 1]]
  step 0 qfunc [ 2.30 2.81 2.44 2.31 ]
  step 1 qfunc [ 2.44 3.02 2.52 2.43 ]
  step 2 qfunc [ 2.39 2.45 2.53 3.22 ]
  step 3 qfunc [ 2.39 2.42 2.58 3.24 ]
  step 4 qfunc [ 3.37 3.22 3.38 3.08 ]
  step 5 qfunc [ 3.37 3.40 3.05 3.19 ]
48: score 0.539 initialq 3.11 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.25 2.86 2.36 2.24 ]
  step 1 qfunc [ 2.38 2.42 3.04 2.38 ]
  step 2 qfunc [ 2.49 3.11 2.55 2.54 ]
  step 3 qfunc [ 2.48 3.13 2.42 2.45 ]
  step 4 qfunc [ 2.54 2.58 2.43 3.19 ]
  step 5 qfunc [ 3.20 2.88 2.74 2.79 ]
49: score 0.750 initialq 3.08 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.31 2.85 2.44 2.29 ]
  step 1 qfunc [ 2.40 2.99 2.56 2.44 ]
  step 2 qfunc [ 2.43 2.55 2.56 3.23 ]
  step 3 qfunc [ 2.54 2.52 2.78 3.25 ]
  step 4 qfunc [ 2.80 2.51 3.37 3.06 ]
  step 5 qfunc [ 3.34 3.20 3.32 3.40 ]
50: score 0.753 initialq 3.07 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.27 2.83 2.43 2.25 ]
  step 1 qfunc [ 2.40 2.96 2.53 2.41 ]
  step 2 qfunc [ 2.40 2.44 2.61 3.19 ]
  step 3 qfunc [ 2.42 2.56 2.89 3.22 ]
  step 4 qfunc [ 2.81 2.86 3.35 2.98 ]
  step 5 qfunc [ 3.37 3.26 3.14 3.04 ]
51: score 0.518 initialq 3.06 opt [[1 1 1 3 2 0]]
  step 0 qfunc [ 2.22 2.77 2.31 2.23 ]
  step 1 qfunc [ 2.34 2.98 2.41 2.35 ]
  step 2 qfunc [ 2.30 3.14 2.36 2.60 ]
  step 3 qfunc [ 2.32 2.37 2.31 3.23 ]
  step 4 qfunc [ 2.93 2.76 3.25 2.65 ]
  step 5 qfunc [ 3.26 2.81 2.94 2.71 ]
52: score 0.755 initialq 3.04 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.19 2.80 2.32 2.17 ]
  step 1 qfunc [ 2.30 2.95 2.45 2.31 ]
  step 2 qfunc [ 2.34 2.35 2.47 3.13 ]
  step 3 qfunc [ 2.59 2.49 2.69 3.16 ]
  step 4 qfunc [ 3.18 2.63 3.18 2.94 ]
  step 5 qfunc [ 3.14 2.98 3.20 3.20 ]
53: score 0.770 initialq 3.02 opt [[1 2 1 1 3 2]]
  step 0 qfunc [ 2.28 2.75 2.44 2.28 ]
  step 1 qfunc [ 2.43 2.52 2.88 2.43 ]
  step 2 qfunc [ 2.53 2.93 2.52 2.55 ]
  step 3 qfunc [ 2.55 3.03 2.60 2.76 ]
  step 4 qfunc [ 2.59 2.60 2.76 3.13 ]
  step 5 qfunc [ 2.92 2.81 3.13 3.01 ]
54: score 0.750 initialq 3.02 opt [[1 1 1 3 3 2]]
  step 0 qfunc [ 2.29 2.74 2.42 2.28 ]
  step 1 qfunc [ 2.39 2.93 2.51 2.40 ]
  step 2 qfunc [ 2.37 3.17 2.54 2.52 ]
  step 3 qfunc [ 2.43 2.41 2.55 3.32 ]
  step 4 qfunc [ 2.60 2.66 3.05 3.34 ]
  step 5 qfunc [ 2.52 2.47 3.45 2.53 ]
55: score 0.794 initialq 3.02 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.27 2.53 2.80 2.29 ]
  step 1 qfunc [ 2.35 2.93 2.37 2.40 ]
  step 2 qfunc [ 2.57 2.98 2.58 2.58 ]
  step 3 qfunc [ 2.66 3.06 2.60 2.77 ]
  step 4 qfunc [ 2.79 2.78 2.67 3.15 ]
  step 5 qfunc [ 3.04 2.85 3.16 3.09 ]
56: score 0.500 initialq 3.01 opt [[1 1 3 1 2 1]]
  step 0 qfunc [ 2.24 2.76 2.33 2.24 ]
  step 1 qfunc [ 2.36 2.92 2.45 2.38 ]
  step 2 qfunc [ 2.36 2.48 2.47 3.07 ]
  step 3 qfunc [ 2.48 3.10 2.38 2.68 ]
  step 4 qfunc [ 2.80 2.81 3.16 2.78 ]
  step 5 qfunc [ 3.15 3.18 2.55 3.10 ]
57: score 0.750 initialq 3.00 opt [[1 1 1 3 3 2]]
  step 0 qfunc [ 2.29 2.76 2.46 2.28 ]
  step 1 qfunc [ 2.42 2.91 2.56 2.45 ]
  step 2 qfunc [ 2.42 3.12 2.63 2.64 ]
  step 3 qfunc [ 2.49 2.52 2.67 3.30 ]
  step 4 qfunc [ 2.75 2.77 3.31 3.32 ]
  step 5 qfunc [ 2.57 2.48 3.34 2.55 ]
58: score 0.750 initialq 2.99 opt [[1 2 1 3 3 0]]
  step 0 qfunc [ 2.23 2.83 2.38 2.21 ]
  step 1 qfunc [ 2.34 2.52 2.93 2.37 ]
  step 2 qfunc [ 2.50 2.97 2.49 2.54 ]
  step 3 qfunc [ 2.58 2.81 2.70 3.06 ]
  step 4 qfunc [ 2.77 2.78 3.01 3.08 ]
  step 5 qfunc [ 3.09 2.52 3.07 2.67 ]
59: score 0.500 initialq 2.99 opt [[2 1 1 1 1 3]]
  step 0 qfunc [ 2.25 2.45 2.77 2.26 ]
  step 1 qfunc [ 2.29 2.90 2.35 2.32 ]
  step 2 qfunc [ 2.53 2.96 2.55 2.55 ]
  step 3 qfunc [ 2.67 3.03 2.58 2.77 ]
  step 4 qfunc [ 2.94 3.08 2.56 3.05 ]
  step 5 qfunc [ 2.66 2.57 2.60 3.10 ]
60: score 0.500 initialq 2.98 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.25 2.78 2.42 2.26 ]
  step 1 qfunc [ 2.39 2.51 2.91 2.42 ]
  step 2 qfunc [ 2.48 2.95 2.50 2.53 ]
  step 3 qfunc [ 2.49 3.05 2.57 2.74 ]
  step 4 qfunc [ 2.64 2.62 2.68 3.15 ]
  step 5 qfunc [ 3.15 3.08 3.11 2.98 ]
61: score 0.961 initialq 2.98 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.27 2.80 2.39 2.25 ]
  step 1 qfunc [ 2.37 2.92 2.54 2.41 ]
  step 2 qfunc [ 2.38 2.74 2.57 3.13 ]
  step 3 qfunc [ 2.60 2.68 3.15 2.81 ]
  step 4 qfunc [ 2.87 2.90 2.82 3.16 ]
  step 5 qfunc [ 2.51 2.42 3.15 2.52 ]
62: score 0.750 initialq 2.98 opt [[1 1 3 3 2 1]]
  step 0 qfunc [ 2.24 2.66 2.32 2.23 ]
  step 1 qfunc [ 2.31 2.88 2.41 2.32 ]
  step 2 qfunc [ 2.31 2.44 2.41 3.04 ]
  step 3 qfunc [ 2.41 2.44 2.41 3.07 ]
  step 4 qfunc [ 2.67 2.87 3.21 2.57 ]
  step 5 qfunc [ 3.10 3.22 2.89 2.60 ]
63: score 0.849 initialq 2.97 opt [[1 1 3 3 1 2]]
  step 0 qfunc [ 2.33 2.49 2.46 2.31 ]
  step 1 qfunc [ 2.43 2.81 2.53 2.45 ]
  step 2 qfunc [ 2.42 2.46 2.54 3.06 ]
  step 3 qfunc [ 2.36 2.53 2.56 3.09 ]
  step 4 qfunc [ 2.55 3.19 3.12 2.67 ]
  step 5 qfunc [ 2.42 2.31 3.31 2.50 ]
64: score 1.000 initialq 2.95 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.30 2.59 2.47 2.28 ]
  step 1 qfunc [ 2.39 2.84 2.57 2.44 ]
  step 2 qfunc [ 2.25 2.45 2.55 3.05 ]
  step 3 qfunc [ 2.34 2.37 2.66 3.08 ]
  step 4 qfunc [ 2.67 2.68 3.10 2.96 ]
  step 5 qfunc [ 3.13 3.00 3.01 3.07 ]
65: score 0.500 initialq 2.94 opt [[1 1 1 3 2 1]]
  step 0 qfunc [ 2.26 2.62 2.43 2.29 ]
  step 1 qfunc [ 2.42 2.80 2.54 2.43 ]
  step 2 qfunc [ 2.41 2.97 2.57 2.73 ]
  step 3 qfunc [ 2.34 2.54 2.56 3.03 ]
  step 4 qfunc [ 2.92 2.96 3.07 2.66 ]
  step 5 qfunc [ 3.02 3.09 2.76 2.94 ]
66: score 0.996 initialq 2.93 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.24 2.70 2.49 2.28 ]
  step 1 qfunc [ 2.41 2.86 2.59 2.44 ]
  step 2 qfunc [ 2.36 2.55 2.57 3.04 ]
  step 3 qfunc [ 2.48 2.72 3.07 2.68 ]
  step 4 qfunc [ 2.83 2.95 2.58 3.08 ]
  step 5 qfunc [ 3.07 2.53 3.09 2.66 ]
67: score 0.991 initialq 2.93 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.33 2.49 2.67 2.32 ]
  step 1 qfunc [ 2.39 2.84 2.39 2.40 ]
  step 2 qfunc [ 2.55 2.90 2.55 2.56 ]
  step 3 qfunc [ 2.59 2.68 2.51 3.00 ]
  step 4 qfunc [ 2.66 2.69 2.57 3.01 ]
  step 5 qfunc [ 2.57 2.49 3.30 2.54 ]
68: score 0.500 initialq 2.93 opt [[1 1 1 3 2 0]]
  step 0 qfunc [ 2.26 2.69 2.41 2.28 ]
  step 1 qfunc [ 2.39 2.84 2.51 2.40 ]
  step 2 qfunc [ 2.39 3.04 2.58 2.58 ]
  step 3 qfunc [ 2.39 2.43 2.62 3.13 ]
  step 4 qfunc [ 2.78 2.96 3.14 2.82 ]
  step 5 qfunc [ 3.16 3.07 3.15 2.94 ]
69: score 0.513 initialq 2.92 opt [[1 1 1 3 2 1]]
  step 0 qfunc [ 2.24 2.67 2.38 2.23 ]
  step 1 qfunc [ 2.36 2.80 2.47 2.36 ]
  step 2 qfunc [ 2.35 2.99 2.52 2.60 ]
  step 3 qfunc [ 2.37 2.51 2.62 3.14 ]
  step 4 qfunc [ 2.66 2.67 3.15 2.71 ]
  step 5 qfunc [ 3.13 3.17 2.91 3.01 ]
70: score 0.751 initialq 2.91 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.26 2.53 2.37 2.25 ]
  step 1 qfunc [ 2.34 2.76 2.47 2.36 ]
  step 2 qfunc [ 2.36 2.40 2.59 2.93 ]
  step 3 qfunc [ 2.36 2.47 2.59 2.96 ]
  step 4 qfunc [ 3.01 2.78 3.07 2.63 ]
  step 5 qfunc [ 3.09 2.96 2.79 2.69 ]
71: score 0.750 initialq 2.90 opt [[1 1 3 3 2 0]]
  step 0 qfunc [ 2.25 2.61 2.38 2.24 ]
  step 1 qfunc [ 2.35 2.80 2.43 2.36 ]
  step 2 qfunc [ 2.34 2.52 2.43 3.01 ]
  step 3 qfunc [ 2.62 2.44 2.48 3.04 ]
  step 4 qfunc [ 2.97 2.57 3.21 2.61 ]
  step 5 qfunc [ 3.23 3.17 2.95 3.01 ]
72: score 0.750 initialq 2.90 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.25 2.46 2.63 2.26 ]
  step 1 qfunc [ 2.31 2.79 2.31 2.34 ]
  step 2 qfunc [ 2.49 2.86 2.48 2.52 ]
  step 3 qfunc [ 2.54 2.98 2.51 2.74 ]
  step 4 qfunc [ 2.71 2.66 2.67 3.11 ]
  step 5 qfunc [ 2.99 2.83 3.11 2.90 ]
73: score 0.500 initialq 2.90 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.24 2.46 2.60 2.28 ]
  step 1 qfunc [ 2.29 2.81 2.31 2.34 ]
  step 2 qfunc [ 2.50 2.89 2.48 2.52 ]
  step 3 qfunc [ 2.51 2.70 2.46 2.97 ]
  step 4 qfunc [ 2.83 2.52 2.43 2.99 ]
  step 5 qfunc [ 2.56 2.48 3.16 2.54 ]
74: score 0.750 initialq 2.87 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.30 2.60 2.47 2.29 ]
  step 1 qfunc [ 2.41 2.47 2.79 2.41 ]
  step 2 qfunc [ 2.52 2.85 2.50 2.53 ]
  step 3 qfunc [ 2.56 2.94 2.55 2.69 ]
  step 4 qfunc [ 2.61 2.66 2.63 3.05 ]
  step 5 qfunc [ 2.74 2.66 2.80 3.04 ]
75: score 0.750 initialq 2.84 opt [[1 1 3 3 2 1]]
  step 0 qfunc [ 2.28 2.61 2.37 2.28 ]
  step 1 qfunc [ 2.36 2.80 2.43 2.37 ]
  step 2 qfunc [ 2.31 2.43 2.42 3.00 ]
  step 3 qfunc [ 2.39 2.43 2.43 3.03 ]
  step 4 qfunc [ 2.82 2.55 3.15 2.72 ]
  step 5 qfunc [ 3.12 3.17 3.00 2.88 ]
76: score 0.500 initialq 2.84 opt [[1 1 1 3 3 2]]
  step 0 qfunc [ 2.29 2.58 2.42 2.30 ]
  step 1 qfunc [ 2.37 2.73 2.50 2.39 ]
  step 2 qfunc [ 2.37 2.88 2.53 2.48 ]
  step 3 qfunc [ 2.40 2.65 2.56 2.99 ]
  step 4 qfunc [ 2.58 2.60 2.98 3.00 ]
  step 5 qfunc [ 2.52 2.40 3.02 2.52 ]
77: score 0.888 initialq 2.83 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.27 2.62 2.41 2.24 ]
  step 1 qfunc [ 2.37 2.45 2.75 2.39 ]
  step 2 qfunc [ 2.49 2.80 2.49 2.50 ]
  step 3 qfunc [ 2.51 2.81 2.52 2.91 ]
  step 4 qfunc [ 2.61 2.77 2.60 2.92 ]
  step 5 qfunc [ 2.61 2.32 3.41 2.56 ]
78: score 0.750 initialq 2.83 opt [[1 1 3 2 1 3]]
  step 0 qfunc [ 2.24 2.52 2.44 2.24 ]
  step 1 qfunc [ 2.33 2.70 2.49 2.39 ]
  step 2 qfunc [ 2.45 2.56 2.60 2.85 ]
  step 3 qfunc [ 2.50 2.59 2.89 2.70 ]
  step 4 qfunc [ 2.65 2.91 2.51 2.72 ]
  step 5 qfunc [ 2.75 2.56 2.51 2.95 ]
79: score 0.750 initialq 2.82 opt [[1 3 1 3 2 3]]
  step 0 qfunc [ 2.28 2.44 2.39 2.30 ]
  step 1 qfunc [ 2.38 2.39 2.48 2.73 ]
  step 2 qfunc [ 2.33 2.81 2.48 2.35 ]
  step 3 qfunc [ 2.22 2.35 2.44 2.93 ]
  step 4 qfunc [ 2.55 2.21 2.96 2.64 ]
  step 5 qfunc [ 2.83 2.59 2.52 2.97 ]
80: score 0.500 initialq 2.80 opt [[1 2 1 1 3 1]]
  step 0 qfunc [ 2.28 2.58 2.43 2.28 ]
  step 1 qfunc [ 2.41 2.48 2.72 2.41 ]
  step 2 qfunc [ 2.52 2.78 2.48 2.51 ]
  step 3 qfunc [ 2.61 2.87 2.50 2.69 ]
  step 4 qfunc [ 2.69 2.71 2.49 2.93 ]
  step 5 qfunc [ 2.93 2.93 2.61 2.77 ]
81: score 0.526 initialq 2.77 opt [[1 1 1 1 3 2]]
  step 0 qfunc [ 2.30 2.43 2.40 2.31 ]
  step 1 qfunc [ 2.38 2.53 2.47 2.40 ]
  step 2 qfunc [ 2.35 2.81 2.47 2.44 ]
  step 3 qfunc [ 2.35 2.87 2.48 2.59 ]
  step 4 qfunc [ 2.27 2.39 2.44 2.89 ]
  step 5 qfunc [ 2.55 2.44 2.91 2.54 ]
82: score 0.712 initialq 2.76 opt [[2 1 1 1 3 2]]
  step 0 qfunc [ 2.20 2.37 2.52 2.24 ]
  step 1 qfunc [ 2.29 2.72 2.32 2.37 ]
  step 2 qfunc [ 2.48 2.80 2.45 2.49 ]
  step 3 qfunc [ 2.45 2.91 2.44 2.46 ]
  step 4 qfunc [ 2.46 2.35 2.44 2.97 ]
  step 5 qfunc [ 2.79 2.00 2.97 2.64 ]
83: score 0.500 initialq 2.73 opt [[2 1 3 1 3 0]]
  step 0 qfunc [ 2.30 2.43 2.45 2.29 ]
  step 1 qfunc [ 2.34 2.65 2.37 2.38 ]
  step 2 qfunc [ 2.49 2.35 2.50 2.73 ]
  step 3 qfunc [ 2.53 2.76 2.50 2.55 ]
  step 4 qfunc [ 2.76 2.60 2.35 2.83 ]
  step 5 qfunc [ 2.84 2.48 2.06 2.58 ]
84: score 0.750 initialq 2.72 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.29 2.45 2.44 2.28 ]
  step 1 qfunc [ 2.42 2.45 2.49 2.44 ]
  step 2 qfunc [ 2.49 2.51 2.49 2.49 ]
  step 3 qfunc [ 2.51 2.55 2.49 2.53 ]
  step 4 qfunc [ 2.55 2.54 2.56 2.62 ]
  step 5 qfunc [ 2.57 2.52 2.59 2.65 ]
85: score 0.750 initialq 2.67 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.28 2.40 2.39 2.26 ]
  step 1 qfunc [ 2.37 2.39 2.44 2.39 ]
  step 2 qfunc [ 2.43 2.46 2.44 2.44 ]
  step 3 qfunc [ 2.43 2.49 2.46 2.47 ]
  step 4 qfunc [ 2.46 2.48 2.49 2.54 ]
  step 5 qfunc [ 2.47 2.52 2.51 2.56 ]
86: score 0.853 initialq 2.65 opt [[1 2 1 1 3 2]]
  step 0 qfunc [ 2.30 2.47 2.44 2.32 ]
  step 1 qfunc [ 2.44 2.47 2.51 2.45 ]
  step 2 qfunc [ 2.51 2.52 2.50 2.50 ]
  step 3 qfunc [ 2.52 2.60 2.49 2.54 ]
  step 4 qfunc [ 2.55 2.50 2.51 2.69 ]
  step 5 qfunc [ 2.54 2.36 2.70 2.54 ]
87: score 0.750 initialq 2.62 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.30 2.46 2.46 2.30 ]
  step 1 qfunc [ 2.40 2.43 2.51 2.42 ]
  step 2 qfunc [ 2.51 2.53 2.50 2.51 ]
  step 3 qfunc [ 2.52 2.62 2.50 2.53 ]
  step 4 qfunc [ 2.54 2.59 2.54 2.70 ]
  step 5 qfunc [ 2.57 2.55 2.58 2.71 ]
88: score 0.536 initialq 2.58 opt [[1 2 1 0 1 3]]
  step 0 qfunc [ 2.32 2.51 2.49 2.32 ]
  step 1 qfunc [ 2.45 2.46 2.56 2.48 ]
  step 2 qfunc [ 2.55 2.56 2.56 2.56 ]
  step 3 qfunc [ 2.57 2.57 2.57 2.56 ]
  step 4 qfunc [ 2.57 2.60 2.54 2.58 ]
  step 5 qfunc [ 2.58 2.52 2.52 2.65 ]
89: score 1.000 initialq 2.56 opt [[1 2 2 1 3 3]]
  step 0 qfunc [ 2.24 2.41 2.37 2.27 ]
  step 1 qfunc [ 2.35 2.37 2.46 2.38 ]
  step 2 qfunc [ 2.44 2.46 2.48 2.44 ]
  step 3 qfunc [ 2.47 2.49 2.48 2.46 ]
  step 4 qfunc [ 2.49 2.50 2.50 2.52 ]
  step 5 qfunc [ 2.50 2.47 2.53 2.55 ]
90: score 0.736 initialq 2.55 opt [[2 1 1 1 3 3]]
  step 0 qfunc [ 2.31 2.49 2.49 2.32 ]
  step 1 qfunc [ 2.43 2.54 2.41 2.43 ]
  step 2 qfunc [ 2.54 2.55 2.55 2.54 ]
  step 3 qfunc [ 2.54 2.60 2.56 2.52 ]
  step 4 qfunc [ 2.56 2.64 2.59 2.67 ]
  step 5 qfunc [ 2.58 2.55 2.61 2.70 ]
91: score 0.750 initialq 2.54 opt [[1 2 1 1 2 1]]
  step 0 qfunc [ 2.26 2.37 2.34 2.26 ]
  step 1 qfunc [ 2.35 2.38 2.41 2.38 ]
  step 2 qfunc [ 2.41 2.42 2.40 2.41 ]
  step 3 qfunc [ 2.42 2.44 2.43 2.41 ]
  step 4 qfunc [ 2.44 2.45 2.47 2.44 ]
  step 5 qfunc [ 2.46 2.50 2.43 2.46 ]
92: score 0.500 initialq 2.51 opt [[1 2 1 3 0 3]]
  step 0 qfunc [ 2.29 2.45 2.42 2.28 ]
  step 1 qfunc [ 2.42 2.42 2.50 2.45 ]
  step 2 qfunc [ 2.50 2.51 2.49 2.50 ]
  step 3 qfunc [ 2.52 2.54 2.45 2.55 ]
  step 4 qfunc [ 2.60 2.49 2.46 2.57 ]
  step 5 qfunc [ 2.62 2.50 2.48 2.66 ]
93: score 0.250 initialq 2.49 opt [[1 2 0 0 0 0]]
  step 0 qfunc [ 2.30 2.46 2.46 2.30 ]
  step 1 qfunc [ 2.43 2.42 2.51 2.44 ]
  step 2 qfunc [ 2.52 2.48 2.51 2.52 ]
  step 3 qfunc [ 2.53 2.50 2.52 2.53 ]
  step 4 qfunc [ 2.54 2.52 2.53 2.53 ]
  step 5 qfunc [ 2.54 2.53 2.54 2.53 ]
94: score 0.646 initialq 2.48 opt [[2 1 0 1 1 0]]
  step 0 qfunc [ 2.30 2.43 2.43 2.29 ]
  step 1 qfunc [ 2.40 2.48 2.40 2.44 ]
  step 2 qfunc [ 2.49 2.49 2.49 2.48 ]
  step 3 qfunc [ 2.49 2.51 2.49 2.48 ]
  step 4 qfunc [ 2.51 2.52 2.44 2.51 ]
  step 5 qfunc [ 2.55 2.49 2.50 2.53 ]
95: score 0.500 initialq 2.46 opt [[1 2 1 1 1 0]]
  step 0 qfunc [ 2.30 2.44 2.43 2.30 ]
  step 1 qfunc [ 2.40 2.41 2.49 2.43 ]
  step 2 qfunc [ 2.48 2.49 2.48 2.49 ]
  step 3 qfunc [ 2.48 2.50 2.49 2.48 ]
  step 4 qfunc [ 2.50 2.51 2.50 2.50 ]
  step 5 qfunc [ 2.52 2.50 2.51 2.52 ]
96: score 0.750 initialq 2.44 opt [[1 2 1 2 1 1]]
  step 0 qfunc [ 2.26 2.42 2.38 2.28 ]
  step 1 qfunc [ 2.37 2.40 2.46 2.40 ]
  step 2 qfunc [ 2.46 2.46 2.46 2.46 ]
  step 3 qfunc [ 2.46 2.46 2.46 2.46 ]
  step 4 qfunc [ 2.47 2.48 2.46 2.46 ]
  step 5 qfunc [ 2.48 2.49 2.47 2.47 ]
97: score 0.828 initialq 2.42 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.22 2.38 2.36 2.24 ]
  step 1 qfunc [ 2.33 2.36 2.43 2.36 ]
  step 2 qfunc [ 2.43 2.44 2.42 2.43 ]
  step 3 qfunc [ 2.45 2.43 2.45 2.45 ]
  step 4 qfunc [ 2.47 2.41 2.46 2.48 ]
  step 5 qfunc [ 2.01 1.87 2.76 2.01 ]
98: score 0.750 initialq 2.41 opt [[1 2 2 1 3 0]]
  step 0 qfunc [ 2.24 2.36 2.34 2.25 ]
  step 1 qfunc [ 2.34 2.35 2.41 2.34 ]
  step 2 qfunc [ 2.40 2.42 2.43 2.39 ]
  step 3 qfunc [ 2.42 2.46 2.44 2.43 ]
  step 4 qfunc [ 2.48 2.47 2.45 2.48 ]
  step 5 qfunc [ 2.50 2.48 2.44 2.47 ]
99: score 0.750 initialq 2.41 opt [[1 2 1 1 2 1]]
  step 0 qfunc [ 2.30 2.39 2.38 2.29 ]
  step 1 qfunc [ 2.37 2.38 2.43 2.39 ]
  step 2 qfunc [ 2.43 2.44 2.42 2.43 ]
  step 3 qfunc [ 2.43 2.45 2.44 2.43 ]
  step 4 qfunc [ 2.44 2.46 2.46 2.43 ]
  step 5 qfunc [ 2.47 2.48 2.44 2.46 ]
Out[13]:
[<matplotlib.lines.Line2D at 0x7f0b9f56aa10>]

In [11]:
'''
Checking correlations
'''
data1 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')
data2 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')
data3 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real0-runB.npz')
data4 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/policies-rtype1-trajectories400-runB.npz')

vloss = data1['vloss']
trueqvals = data2['qvals'][:,0]
falseqvals = data3['qvals'][:,0]
scores = data2['scores'][:,0]
rewards = data4['rewards'][:,0]

figure()
title('Horizon 5 Dropout=0.8 Epoch 60 Scores')
xlabel('real qvals')
ylabel('posttest scores')
plot(rewards, scores, '.')


Out[11]:
[<matplotlib.lines.Line2D at 0x7fd36b1c2610>]

In [5]:
'''
Trying to find a stopping point for horizon 5 dropout 0.7
'''
data1 = np.load('experiments/test2_model_small-dropout7-shuffle0-data-test2-n100000-l5-random.pickle/stats-stopping.npz')

vloss = data1['vloss']

xs = list(six.moves.range(vloss.shape[1]))
ys_mean = np.mean(vloss,axis=0)
ys_err = np.std(vloss,axis=0)/np.sqrt(vloss.shape[0])

last_val = np.min(ys_mean)
thresh = last_val
six.print_('thresh {:4f} last val {:4f}'.format(thresh, last_val))

figure()
title('Horizon 5 Dropout=0.7 Val Loss')
plot(xs, ys_mean,color='#0000ff')
plot(xs, ys_mean+ys_err,color='#ccccff')
plot(xs, ys_mean-ys_err,color='#ccccff')
plot([xs[0], xs[-1]], [last_val, last_val], color='#00ff00')
plot([xs[0], xs[-1]], [thresh, thresh], color='#00ff00')
xlabel('training epoch')
ylabel('val loss')
#xlim(20,40)
ylim(0.009,0.010)
# looks like there's a low point at 65


thresh 0.009242 last val 0.009242
Out[5]:
(0.009, 0.01)

In [5]:
'''
Checking correlations for no dropout
'''
data11 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runA.npz')
data12 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')

data21 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runA.npz')
data22 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

data51 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runA.npz')
data52 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')

data61 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runA.npz')
data62 = np.load('experiments/test2_model_small-dropout10-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts10000-runB.npz')

vloss = np.concatenate([data11['vloss'],data12['vloss']])
scores = np.concatenate([data21['scores'][:,0],data22['scores'][:,0]])
initialq = np.concatenate([data51['qvals'][:,0],data52['qvals'][:,0]])
opts = np.vstack([data61['opts'],data62['opts']])
qfuncs = np.vstack([data61['qs'][:,0,:,:],data62['qs'][:,0,:,:]])

sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
    for t in six.moves.range(6):
        six.print_('  step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
    pass


 0: score 0.500 initialq 3.97 opt [[1 2 1 3 0 0]]
  step 0 qfunc [ 2.47 3.91 3.15 2.86 ]
  step 1 qfunc [ 3.12 3.50 3.95 2.97 ]
  step 2 qfunc [ 3.23 3.96 3.22 3.07 ]
  step 3 qfunc [ 3.94 3.94 3.94 3.99 ]
  step 4 qfunc [ 3.99 3.99 3.99 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
 1: score 0.755 initialq 3.96 opt [[2 1 3 1 3 3]]
  step 0 qfunc [ 2.74 3.14 3.92 2.54 ]
  step 1 qfunc [ 2.72 3.94 2.77 2.57 ]
  step 2 qfunc [ 3.19 3.95 3.07 3.95 ]
  step 3 qfunc [ 3.11 3.97 2.92 2.96 ]
  step 4 qfunc [ 3.97 3.96 3.30 3.98 ]
  step 5 qfunc [ 3.98 3.98 3.99 3.99 ]
 2: score 0.750 initialq 3.96 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.67 3.89 2.96 2.83 ]
  step 1 qfunc [ 3.16 3.35 3.94 3.09 ]
  step 2 qfunc [ 3.10 3.83 2.99 3.96 ]
  step 3 qfunc [ 2.91 3.97 2.99 2.95 ]
  step 4 qfunc [ 3.97 3.88 3.00 3.98 ]
  step 5 qfunc [ 3.99 3.97 3.99 3.97 ]
 3: score 0.953 initialq 3.96 opt [[1 2 1 3 2 3]]
  step 0 qfunc [ 2.62 3.89 2.94 2.72 ]
  step 1 qfunc [ 2.91 3.34 3.93 2.80 ]
  step 2 qfunc [ 3.27 3.95 3.27 3.05 ]
  step 3 qfunc [ 3.29 3.96 3.23 3.97 ]
  step 4 qfunc [ 3.97 3.98 3.98 3.97 ]
  step 5 qfunc [ 3.97 3.98 3.98 3.99 ]
 4: score 0.880 initialq 3.96 opt [[1 1 3 2 3 3]]
  step 0 qfunc [ 2.62 3.83 2.97 2.77 ]
  step 1 qfunc [ 3.11 3.90 3.21 2.98 ]
  step 2 qfunc [ 3.26 3.36 3.46 3.95 ]
  step 3 qfunc [ 3.48 3.50 3.96 3.96 ]
  step 4 qfunc [ 3.97 3.91 3.96 3.99 ]
  step 5 qfunc [ 3.98 3.99 3.99 3.99 ]
 5: score 0.977 initialq 3.96 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.63 3.89 3.22 2.47 ]
  step 1 qfunc [ 3.06 3.27 3.93 3.11 ]
  step 2 qfunc [ 3.33 3.95 3.24 3.49 ]
  step 3 qfunc [ 3.95 3.94 3.92 3.98 ]
  step 4 qfunc [ 3.98 3.99 3.95 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
 6: score 0.750 initialq 3.96 opt [[1 2 1 3 2 2]]
  step 0 qfunc [ 2.48 3.89 3.03 2.69 ]
  step 1 qfunc [ 3.10 3.27 3.93 2.88 ]
  step 2 qfunc [ 3.18 3.95 3.17 3.21 ]
  step 3 qfunc [ 3.86 3.95 3.40 3.97 ]
  step 4 qfunc [ 3.98 3.94 3.98 3.98 ]
  step 5 qfunc [ 3.99 3.96 3.99 3.99 ]
 7: score 0.880 initialq 3.96 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.82 3.78 2.96 2.68 ]
  step 1 qfunc [ 2.87 3.88 3.18 2.81 ]
  step 2 qfunc [ 3.16 3.07 3.35 3.93 ]
  step 3 qfunc [ 3.88 3.65 3.96 3.91 ]
  step 4 qfunc [ 3.63 3.64 3.98 3.96 ]
  step 5 qfunc [ 3.03 3.15 3.04 3.99 ]
 8: score 0.773 initialq 3.96 opt [[1 2 0 1 3 3]]
  step 0 qfunc [ 2.53 3.84 2.86 2.71 ]
  step 1 qfunc [ 3.00 3.13 3.91 3.02 ]
  step 2 qfunc [ 3.94 3.56 3.23 3.29 ]
  step 3 qfunc [ 3.18 3.96 3.25 3.36 ]
  step 4 qfunc [ 3.03 3.96 3.01 3.98 ]
  step 5 qfunc [ 3.97 3.72 3.98 3.99 ]
 9: score 0.750 initialq 3.96 opt [[1 2 3 1 3 3]]
  step 0 qfunc [ 2.77 3.86 2.93 2.52 ]
  step 1 qfunc [ 2.84 2.96 3.92 2.70 ]
  step 2 qfunc [ 3.30 3.55 3.28 3.95 ]
  step 3 qfunc [ 3.36 3.96 3.33 3.50 ]
  step 4 qfunc [ 3.53 3.93 3.08 3.97 ]
  step 5 qfunc [ 3.86 3.86 3.98 3.99 ]
10: score 0.750 initialq 3.96 opt [[1 2 1 3 0 2]]
  step 0 qfunc [ 2.56 3.90 3.18 2.72 ]
  step 1 qfunc [ 3.18 3.41 3.93 2.98 ]
  step 2 qfunc [ 3.37 3.95 3.35 3.55 ]
  step 3 qfunc [ 3.93 3.94 3.94 3.98 ]
  step 4 qfunc [ 3.98 3.97 3.98 3.98 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.97 ]
11: score 0.747 initialq 3.95 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.78 3.88 2.99 2.76 ]
  step 1 qfunc [ 2.91 3.14 3.92 2.99 ]
  step 2 qfunc [ 3.24 3.94 3.21 3.91 ]
  step 3 qfunc [ 3.95 3.91 3.92 3.98 ]
  step 4 qfunc [ 3.97 3.96 3.99 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
12: score 0.750 initialq 3.95 opt [[1 1 2 0 3 2]]
  step 0 qfunc [ 2.57 3.84 3.04 2.50 ]
  step 1 qfunc [ 2.77 3.89 3.14 2.91 ]
  step 2 qfunc [ 3.67 3.61 3.94 3.60 ]
  step 3 qfunc [ 3.97 3.94 3.95 3.90 ]
  step 4 qfunc [ 3.97 3.96 3.96 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
13: score 0.833 initialq 3.95 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.73 3.82 2.90 2.54 ]
  step 1 qfunc [ 3.11 3.90 3.16 3.10 ]
  step 2 qfunc [ 3.23 3.38 3.05 3.94 ]
  step 3 qfunc [ 3.95 3.49 3.93 3.96 ]
  step 4 qfunc [ 3.97 3.21 3.97 3.97 ]
  step 5 qfunc [ 3.95 3.96 3.99 3.98 ]
14: score 0.924 initialq 3.95 opt [[1 1 2 3 2 3]]
  step 0 qfunc [ 2.78 3.72 2.93 2.74 ]
  step 1 qfunc [ 3.03 3.84 3.24 3.00 ]
  step 2 qfunc [ 3.32 3.32 3.92 3.50 ]
  step 3 qfunc [ 3.62 3.27 3.92 3.96 ]
  step 4 qfunc [ 3.30 3.23 3.98 3.56 ]
  step 5 qfunc [ 3.33 3.32 3.26 3.99 ]
15: score 0.839 initialq 3.95 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.57 3.87 2.97 2.64 ]
  step 1 qfunc [ 3.02 3.14 3.92 2.95 ]
  step 2 qfunc [ 3.38 3.93 3.37 3.94 ]
  step 3 qfunc [ 3.22 3.97 3.20 3.41 ]
  step 4 qfunc [ 3.95 3.97 3.00 3.98 ]
  step 5 qfunc [ 3.99 3.98 3.99 3.99 ]
16: score 0.690 initialq 3.95 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.68 3.87 2.82 2.71 ]
  step 1 qfunc [ 2.75 3.27 3.92 2.91 ]
  step 2 qfunc [ 3.31 3.94 3.29 3.46 ]
  step 3 qfunc [ 3.73 3.94 3.61 3.97 ]
  step 4 qfunc [ 3.95 3.98 3.95 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
17: score 0.747 initialq 3.95 opt [[1 2 1 3 3 1]]
  step 0 qfunc [ 2.65 3.89 3.02 2.88 ]
  step 1 qfunc [ 2.84 3.44 3.93 2.99 ]
  step 2 qfunc [ 3.36 3.94 3.37 3.94 ]
  step 3 qfunc [ 3.97 3.96 3.93 3.98 ]
  step 4 qfunc [ 3.98 3.99 3.98 3.99 ]
  step 5 qfunc [ 3.98 3.99 3.99 3.99 ]
18: score 0.750 initialq 3.95 opt [[1 2 1 1 3 2]]
  step 0 qfunc [ 2.67 3.85 2.93 2.76 ]
  step 1 qfunc [ 3.01 3.16 3.92 2.96 ]
  step 2 qfunc [ 3.21 3.94 3.17 3.06 ]
  step 3 qfunc [ 3.28 3.97 3.37 3.91 ]
  step 4 qfunc [ 3.62 3.97 3.07 3.99 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.99 ]
19: score 0.638 initialq 3.95 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.73 3.86 2.99 2.70 ]
  step 1 qfunc [ 3.01 3.15 3.91 2.88 ]
  step 2 qfunc [ 3.28 3.93 3.36 3.37 ]
  step 3 qfunc [ 3.51 3.96 3.95 3.86 ]
  step 4 qfunc [ 3.89 3.97 3.79 3.98 ]
  step 5 qfunc [ 3.99 3.96 3.99 3.99 ]
20: score 0.750 initialq 3.95 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.70 3.87 2.89 2.68 ]
  step 1 qfunc [ 2.91 3.17 3.91 2.87 ]
  step 2 qfunc [ 3.36 3.94 3.35 3.47 ]
  step 3 qfunc [ 3.95 3.94 3.83 3.96 ]
  step 4 qfunc [ 3.97 3.94 3.94 3.98 ]
  step 5 qfunc [ 3.98 3.98 3.99 3.99 ]
21: score 0.500 initialq 3.95 opt [[1 2 3 1 1 3]]
  step 0 qfunc [ 2.83 3.85 3.07 2.70 ]
  step 1 qfunc [ 2.91 3.15 3.92 3.04 ]
  step 2 qfunc [ 3.27 3.40 3.14 3.95 ]
  step 3 qfunc [ 3.23 3.96 3.36 3.40 ]
  step 4 qfunc [ 3.94 3.98 3.16 3.96 ]
  step 5 qfunc [ 3.00 3.00 3.00 3.98 ]
22: score 0.750 initialq 3.95 opt [[1 1 2 0 3 2]]
  step 0 qfunc [ 2.78 3.82 2.97 2.57 ]
  step 1 qfunc [ 3.05 3.89 3.25 3.16 ]
  step 2 qfunc [ 3.48 3.59 3.95 3.68 ]
  step 3 qfunc [ 3.96 3.48 3.96 3.91 ]
  step 4 qfunc [ 3.97 3.96 3.96 3.98 ]
  step 5 qfunc [ 3.98 3.98 3.99 3.99 ]
23: score 0.750 initialq 3.95 opt [[1 2 3 1 3 3]]
  step 0 qfunc [ 2.70 3.86 3.02 2.73 ]
  step 1 qfunc [ 3.08 3.30 3.93 3.01 ]
  step 2 qfunc [ 3.28 3.52 3.07 3.95 ]
  step 3 qfunc [ 3.16 3.95 3.25 3.95 ]
  step 4 qfunc [ 3.06 3.95 3.41 3.97 ]
  step 5 qfunc [ 3.93 3.94 3.97 3.99 ]
24: score 0.534 initialq 3.95 opt [[2 1 1 0 1 3]]
  step 0 qfunc [ 2.66 3.15 3.87 2.62 ]
  step 1 qfunc [ 2.72 3.91 2.87 2.89 ]
  step 2 qfunc [ 3.15 3.93 3.06 3.38 ]
  step 3 qfunc [ 3.95 3.93 3.27 3.95 ]
  step 4 qfunc [ 3.23 3.98 3.00 3.96 ]
  step 5 qfunc [ 3.10 3.02 3.01 3.98 ]
25: score 0.750 initialq 3.95 opt [[2 1 1 3 1 3]]
  step 0 qfunc [ 2.73 3.02 3.85 2.79 ]
  step 1 qfunc [ 2.84 3.91 2.90 2.64 ]
  step 2 qfunc [ 3.26 3.94 3.21 3.30 ]
  step 3 qfunc [ 3.49 3.40 3.51 3.96 ]
  step 4 qfunc [ 3.97 3.97 3.97 3.97 ]
  step 5 qfunc [ 3.02 3.05 3.05 3.98 ]
26: score 1.000 initialq 3.95 opt [[1 1 2 3 2 3]]
  step 0 qfunc [ 2.70 3.78 3.01 2.60 ]
  step 1 qfunc [ 3.08 3.89 3.30 3.03 ]
  step 2 qfunc [ 3.34 3.39 3.94 3.44 ]
  step 3 qfunc [ 3.87 3.32 3.83 3.97 ]
  step 4 qfunc [ 3.22 3.14 3.98 3.89 ]
  step 5 qfunc [ 3.13 3.16 3.10 3.99 ]
27: score 0.750 initialq 3.95 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.62 3.89 2.93 2.81 ]
  step 1 qfunc [ 2.97 2.84 3.94 3.00 ]
  step 2 qfunc [ 3.23 3.15 3.19 3.95 ]
  step 3 qfunc [ 3.25 3.96 3.25 3.29 ]
  step 4 qfunc [ 3.97 3.96 3.70 3.98 ]
  step 5 qfunc [ 3.97 3.93 3.99 3.98 ]
28: score 0.599 initialq 3.95 opt [[1 2 1 1 3 0]]
  step 0 qfunc [ 2.84 3.84 2.86 2.89 ]
  step 1 qfunc [ 2.91 3.06 3.90 3.10 ]
  step 2 qfunc [ 3.36 3.93 3.29 3.31 ]
  step 3 qfunc [ 3.94 3.96 3.26 3.13 ]
  step 4 qfunc [ 3.97 3.97 3.83 3.98 ]
  step 5 qfunc [ 3.99 3.97 3.98 3.96 ]
29: score 0.750 initialq 3.95 opt [[1 2 1 3 3 0]]
  step 0 qfunc [ 2.74 3.86 2.96 2.66 ]
  step 1 qfunc [ 2.86 3.24 3.91 3.04 ]
  step 2 qfunc [ 3.11 3.93 3.04 3.13 ]
  step 3 qfunc [ 3.30 3.95 3.45 3.96 ]
  step 4 qfunc [ 3.97 3.95 3.48 3.98 ]
  step 5 qfunc [ 3.99 3.97 3.90 3.99 ]
30: score 0.510 initialq 3.95 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.73 3.87 2.95 2.68 ]
  step 1 qfunc [ 2.93 3.29 3.93 3.00 ]
  step 2 qfunc [ 3.20 3.57 3.17 3.95 ]
  step 3 qfunc [ 3.94 3.93 3.11 3.96 ]
  step 4 qfunc [ 2.36 3.96 2.51 2.35 ]
  step 5 qfunc [ 3.03 3.01 3.11 3.99 ]
31: score 0.500 initialq 3.95 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.67 3.83 2.96 2.72 ]
  step 1 qfunc [ 3.14 3.89 3.14 2.96 ]
  step 2 qfunc [ 3.39 3.58 3.42 3.94 ]
  step 3 qfunc [ 3.25 3.84 3.96 3.87 ]
  step 4 qfunc [ 3.91 3.98 3.74 3.97 ]
  step 5 qfunc [ 3.99 3.99 3.59 3.97 ]
32: score 0.526 initialq 3.95 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.70 3.84 2.90 2.76 ]
  step 1 qfunc [ 3.05 3.53 3.91 3.01 ]
  step 2 qfunc [ 3.28 3.76 3.20 3.94 ]
  step 3 qfunc [ 3.03 3.90 3.11 3.96 ]
  step 4 qfunc [ 2.63 3.96 2.74 2.59 ]
  step 5 qfunc [ 3.05 3.03 3.23 3.98 ]
33: score 0.750 initialq 3.95 opt [[2 1 1 3 3 3]]
  step 0 qfunc [ 2.32 2.96 3.88 2.53 ]
  step 1 qfunc [ 2.60 3.91 2.64 2.57 ]
  step 2 qfunc [ 3.11 3.93 3.12 3.34 ]
  step 3 qfunc [ 3.95 3.84 3.61 3.96 ]
  step 4 qfunc [ 3.96 3.96 3.90 3.98 ]
  step 5 qfunc [ 3.95 3.98 3.98 3.99 ]
34: score 0.964 initialq 3.95 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.73 3.81 2.73 2.72 ]
  step 1 qfunc [ 3.06 3.88 3.07 3.08 ]
  step 2 qfunc [ 3.44 3.40 3.72 3.93 ]
  step 3 qfunc [ 3.93 3.67 3.89 3.96 ]
  step 4 qfunc [ 3.97 3.93 3.97 3.97 ]
  step 5 qfunc [ 3.97 3.97 3.98 3.97 ]
35: score 0.750 initialq 3.95 opt [[1 2 1 3 1 3]]
  step 0 qfunc [ 2.70 3.82 3.01 2.71 ]
  step 1 qfunc [ 3.03 3.18 3.90 2.79 ]
  step 2 qfunc [ 3.13 3.93 3.10 3.22 ]
  step 3 qfunc [ 3.68 3.66 3.67 3.96 ]
  step 4 qfunc [ 3.97 3.97 3.95 3.96 ]
  step 5 qfunc [ 3.15 3.08 3.08 3.99 ]
36: score 0.500 initialq 3.94 opt [[1 2 1 1 3 1]]
  step 0 qfunc [ 2.70 3.84 2.97 2.61 ]
  step 1 qfunc [ 3.04 3.21 3.91 2.96 ]
  step 2 qfunc [ 3.14 3.93 3.18 3.12 ]
  step 3 qfunc [ 3.30 3.96 3.19 3.86 ]
  step 4 qfunc [ 3.60 3.95 3.91 3.98 ]
  step 5 qfunc [ 3.98 3.99 3.97 3.97 ]
37: score 0.750 initialq 3.94 opt [[1 3 1 3 3 2]]
  step 0 qfunc [ 2.63 3.77 2.90 2.73 ]
  step 1 qfunc [ 2.89 2.87 2.99 3.92 ]
  step 2 qfunc [ 2.77 3.94 2.99 2.77 ]
  step 3 qfunc [ 3.25 3.09 2.99 3.96 ]
  step 4 qfunc [ 3.96 3.08 3.96 3.98 ]
  step 5 qfunc [ 2.99 2.96 3.98 2.99 ]
38: score 0.500 initialq 3.94 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.72 3.82 3.02 2.69 ]
  step 1 qfunc [ 2.94 3.18 3.87 3.08 ]
  step 2 qfunc [ 3.34 3.56 3.17 3.90 ]
  step 3 qfunc [ 3.20 3.90 3.90 3.95 ]
  step 4 qfunc [ 2.84 3.96 2.80 2.80 ]
  step 5 qfunc [ 3.02 3.00 3.00 3.99 ]
39: score 0.740 initialq 3.94 opt [[1 2 1 3 3 1]]
  step 0 qfunc [ 2.66 3.86 2.96 2.75 ]
  step 1 qfunc [ 2.88 3.20 3.90 3.02 ]
  step 2 qfunc [ 3.00 3.92 3.07 3.04 ]
  step 3 qfunc [ 3.82 3.94 3.70 3.95 ]
  step 4 qfunc [ 3.91 3.96 3.80 3.97 ]
  step 5 qfunc [ 3.98 3.98 3.97 3.97 ]
40: score 0.750 initialq 3.94 opt [[1 2 1 1 0 3]]
  step 0 qfunc [ 2.83 3.85 2.92 2.67 ]
  step 1 qfunc [ 2.97 3.19 3.90 2.88 ]
  step 2 qfunc [ 3.16 3.92 3.06 3.09 ]
  step 3 qfunc [ 3.63 3.95 3.33 3.95 ]
  step 4 qfunc [ 3.97 3.95 3.96 3.96 ]
  step 5 qfunc [ 3.05 3.00 3.09 3.98 ]
41: score 0.750 initialq 3.94 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.74 3.72 2.84 2.72 ]
  step 1 qfunc [ 2.88 3.88 2.98 2.92 ]
  step 2 qfunc [ 2.93 2.84 2.97 3.94 ]
  step 3 qfunc [ 3.44 3.29 2.32 3.95 ]
  step 4 qfunc [ 3.91 3.86 3.19 3.98 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
42: score 0.599 initialq 3.94 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.76 3.75 2.86 2.78 ]
  step 1 qfunc [ 2.88 3.89 2.99 2.90 ]
  step 2 qfunc [ 3.01 2.83 2.99 3.94 ]
  step 3 qfunc [ 3.58 3.61 2.70 3.96 ]
  step 4 qfunc [ 3.98 3.96 3.33 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
43: score 0.750 initialq 3.94 opt [[2 1 1 3 3 3]]
  step 0 qfunc [ 2.61 3.09 3.86 2.74 ]
  step 1 qfunc [ 2.80 3.91 2.73 2.88 ]
  step 2 qfunc [ 3.33 3.93 3.25 3.41 ]
  step 3 qfunc [ 3.46 3.95 3.83 3.96 ]
  step 4 qfunc [ 3.95 3.94 3.95 3.98 ]
  step 5 qfunc [ 3.98 3.96 3.99 3.99 ]
44: score 0.750 initialq 3.94 opt [[2 1 1 3 0 3]]
  step 0 qfunc [ 2.84 3.05 3.84 2.63 ]
  step 1 qfunc [ 2.76 3.90 2.85 2.82 ]
  step 2 qfunc [ 3.29 3.92 3.21 3.24 ]
  step 3 qfunc [ 3.92 3.89 3.33 3.96 ]
  step 4 qfunc [ 3.97 3.96 3.96 3.96 ]
  step 5 qfunc [ 3.49 3.50 3.97 3.98 ]
45: score 0.591 initialq 3.94 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.76 3.78 2.88 2.73 ]
  step 1 qfunc [ 2.94 3.89 2.99 2.96 ]
  step 2 qfunc [ 3.05 3.04 2.99 3.93 ]
  step 3 qfunc [ 3.93 3.77 2.98 3.95 ]
  step 4 qfunc [ 3.97 3.97 3.90 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
46: score 0.911 initialq 3.94 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.73 3.80 2.97 2.75 ]
  step 1 qfunc [ 3.15 3.88 3.17 2.95 ]
  step 2 qfunc [ 3.28 3.52 3.73 3.94 ]
  step 3 qfunc [ 3.95 3.93 3.88 3.96 ]
  step 4 qfunc [ 3.97 3.97 3.98 3.97 ]
  step 5 qfunc [ 3.99 3.99 3.99 3.97 ]
47: score 0.997 initialq 3.94 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.56 3.81 2.74 2.62 ]
  step 1 qfunc [ 3.09 3.89 3.20 2.91 ]
  step 2 qfunc [ 3.29 3.30 3.43 3.94 ]
  step 3 qfunc [ 3.65 3.61 3.96 3.78 ]
  step 4 qfunc [ 3.92 3.96 3.98 3.95 ]
  step 5 qfunc [ 3.08 3.09 3.05 3.98 ]
48: score 0.750 initialq 3.94 opt [[1 1 3 1 2 3]]
  step 0 qfunc [ 2.68 3.81 2.90 2.73 ]
  step 1 qfunc [ 2.98 3.89 3.10 2.96 ]
  step 2 qfunc [ 3.08 3.29 3.07 3.93 ]
  step 3 qfunc [ 3.68 3.96 3.84 3.89 ]
  step 4 qfunc [ 3.95 3.53 3.97 3.97 ]
  step 5 qfunc [ 3.97 3.91 3.98 3.99 ]
49: score 0.750 initialq 3.94 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.78 3.78 2.87 2.71 ]
  step 1 qfunc [ 2.94 3.88 3.06 2.86 ]
  step 2 qfunc [ 3.02 3.33 3.09 3.94 ]
  step 3 qfunc [ 3.33 3.63 3.51 3.95 ]
  step 4 qfunc [ 3.94 3.86 3.96 3.98 ]
  step 5 qfunc [ 2.99 2.98 3.98 2.99 ]
50: score 1.000 initialq 3.94 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.43 3.82 2.93 2.72 ]
  step 1 qfunc [ 2.96 3.89 3.25 3.09 ]
  step 2 qfunc [ 3.07 3.43 3.30 3.93 ]
  step 3 qfunc [ 3.95 3.93 3.95 3.93 ]
  step 4 qfunc [ 3.95 3.96 3.97 3.96 ]
  step 5 qfunc [ 3.98 3.40 3.96 3.99 ]
51: score 0.750 initialq 3.94 opt [[1 1 3 1 2 3]]
  step 0 qfunc [ 2.59 3.81 2.77 2.63 ]
  step 1 qfunc [ 2.85 3.89 2.91 2.80 ]
  step 2 qfunc [ 3.13 3.22 3.08 3.93 ]
  step 3 qfunc [ 3.72 3.95 3.14 3.94 ]
  step 4 qfunc [ 2.81 3.00 3.97 3.97 ]
  step 5 qfunc [ 3.07 3.34 3.04 3.98 ]
52: score 0.750 initialq 3.94 opt [[1 2 1 3 3 1]]
  step 0 qfunc [ 2.77 3.85 2.80 2.65 ]
  step 1 qfunc [ 3.02 3.23 3.90 2.88 ]
  step 2 qfunc [ 3.16 3.93 3.19 3.31 ]
  step 3 qfunc [ 3.44 3.94 3.42 3.95 ]
  step 4 qfunc [ 3.96 3.94 3.96 3.97 ]
  step 5 qfunc [ 3.97 3.98 3.98 3.89 ]
53: score 0.500 initialq 3.94 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.77 3.86 2.92 2.68 ]
  step 1 qfunc [ 2.92 3.35 3.91 2.90 ]
  step 2 qfunc [ 3.32 3.57 3.27 3.93 ]
  step 3 qfunc [ 3.33 3.93 3.26 3.95 ]
  step 4 qfunc [ 2.90 3.96 2.79 2.78 ]
  step 5 qfunc [ 3.15 3.09 3.04 3.98 ]
54: score 0.500 initialq 3.94 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.60 3.83 2.94 2.74 ]
  step 1 qfunc [ 2.94 3.25 3.89 3.07 ]
  step 2 qfunc [ 3.31 3.91 3.37 3.27 ]
  step 3 qfunc [ 3.92 3.91 3.82 3.95 ]
  step 4 qfunc [ 3.97 3.81 3.91 3.97 ]
  step 5 qfunc [ 3.08 3.57 3.98 3.43 ]
55: score 0.742 initialq 3.94 opt [[1 2 1 3 3 3]]
  step 0 qfunc [ 2.73 3.84 3.07 2.62 ]
  step 1 qfunc [ 3.11 3.45 3.89 2.92 ]
  step 2 qfunc [ 3.22 3.91 3.29 3.45 ]
  step 3 qfunc [ 3.93 3.92 3.94 3.94 ]
  step 4 qfunc [ 3.95 3.95 3.94 3.96 ]
  step 5 qfunc [ 3.94 3.97 3.95 3.98 ]
56: score 0.714 initialq 3.94 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.73 3.82 2.79 2.70 ]
  step 1 qfunc [ 2.95 3.90 2.92 3.02 ]
  step 2 qfunc [ 3.25 3.21 3.18 3.94 ]
  step 3 qfunc [ 3.77 3.57 3.41 3.96 ]
  step 4 qfunc [ 3.98 3.95 3.96 3.97 ]
  step 5 qfunc [ 2.99 2.96 3.99 2.99 ]
57: score 0.750 initialq 3.94 opt [[1 1 3 1 2 3]]
  step 0 qfunc [ 2.78 3.80 2.94 2.75 ]
  step 1 qfunc [ 3.08 3.88 3.07 3.01 ]
  step 2 qfunc [ 3.43 3.42 3.41 3.93 ]
  step 3 qfunc [ 3.92 3.95 3.86 3.94 ]
  step 4 qfunc [ 3.96 3.96 3.97 3.96 ]
  step 5 qfunc [ 3.96 3.95 3.97 3.98 ]
58: score 0.750 initialq 3.94 opt [[1 1 3 3 1 2]]
  step 0 qfunc [ 2.67 3.79 2.84 2.72 ]
  step 1 qfunc [ 2.92 3.88 2.99 2.89 ]
  step 2 qfunc [ 3.06 2.99 2.99 3.93 ]
  step 3 qfunc [ 3.74 3.67 2.96 3.95 ]
  step 4 qfunc [ 3.94 3.98 3.90 3.90 ]
  step 5 qfunc [ 2.95 2.92 3.98 2.99 ]
59: score 0.615 initialq 3.94 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.68 3.71 2.87 2.71 ]
  step 1 qfunc [ 2.90 3.86 2.98 2.92 ]
  step 2 qfunc [ 2.98 3.00 2.99 3.93 ]
  step 3 qfunc [ 3.71 3.45 2.77 3.95 ]
  step 4 qfunc [ 3.97 3.96 3.94 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
60: score 0.750 initialq 3.94 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.77 3.78 2.84 2.67 ]
  step 1 qfunc [ 2.83 3.88 2.99 2.93 ]
  step 2 qfunc [ 3.14 3.08 2.99 3.93 ]
  step 3 qfunc [ 3.95 3.51 2.89 3.73 ]
  step 4 qfunc [ 3.96 3.89 3.95 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
61: score 0.682 initialq 3.94 opt [[1 3 1 3 2 3]]
  step 0 qfunc [ 2.67 3.76 2.87 2.72 ]
  step 1 qfunc [ 2.97 3.07 3.09 3.90 ]
  step 2 qfunc [ 2.77 3.92 3.02 2.76 ]
  step 3 qfunc [ 3.37 3.41 3.52 3.95 ]
  step 4 qfunc [ 3.82 3.87 3.97 3.93 ]
  step 5 qfunc [ 3.72 3.65 3.54 3.98 ]
62: score 0.987 initialq 3.94 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.70 3.83 3.00 2.58 ]
  step 1 qfunc [ 2.89 3.11 3.90 2.95 ]
  step 2 qfunc [ 3.16 3.92 3.04 3.03 ]
  step 3 qfunc [ 3.35 3.86 3.28 3.95 ]
  step 4 qfunc [ 2.76 3.90 3.93 3.96 ]
  step 5 qfunc [ 3.22 3.96 3.98 3.74 ]
63: score 0.500 initialq 3.94 opt [[1 1 2 3 2 3]]
  step 0 qfunc [ 2.57 3.80 2.74 2.45 ]
  step 1 qfunc [ 2.94 3.88 2.79 2.86 ]
  step 2 qfunc [ 3.41 3.23 3.93 3.75 ]
  step 3 qfunc [ 3.61 3.75 3.27 3.95 ]
  step 4 qfunc [ 3.93 3.96 3.97 3.78 ]
  step 5 qfunc [ 2.92 2.69 3.03 3.98 ]
64: score 0.500 initialq 3.94 opt [[1 3 1 1 3 2]]
  step 0 qfunc [ 2.74 3.78 2.88 2.70 ]
  step 1 qfunc [ 2.87 2.97 2.99 3.91 ]
  step 2 qfunc [ 2.83 3.93 2.99 2.71 ]
  step 3 qfunc [ 3.28 3.97 2.99 3.94 ]
  step 4 qfunc [ 2.76 2.70 2.99 3.98 ]
  step 5 qfunc [ 2.99 2.99 3.99 2.99 ]
65: score 0.542 initialq 3.93 opt [[2 1 3 1 3 2]]
  step 0 qfunc [ 2.73 3.11 3.84 2.76 ]
  step 1 qfunc [ 2.82 3.90 2.85 2.91 ]
  step 2 qfunc [ 3.48 3.51 3.28 3.93 ]
  step 3 qfunc [ 3.40 3.94 3.29 2.94 ]
  step 4 qfunc [ 3.78 3.96 3.86 3.96 ]
  step 5 qfunc [ 3.48 3.91 3.98 3.81 ]
66: score 0.750 initialq 3.93 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.77 3.78 2.88 2.67 ]
  step 1 qfunc [ 3.04 3.87 3.18 2.93 ]
  step 2 qfunc [ 3.17 3.16 3.77 3.93 ]
  step 3 qfunc [ 3.95 3.43 3.87 3.95 ]
  step 4 qfunc [ 3.96 3.62 3.97 3.96 ]
  step 5 qfunc [ 3.97 3.93 3.98 3.99 ]
67: score 0.750 initialq 3.93 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.63 3.78 2.93 2.78 ]
  step 1 qfunc [ 2.87 3.86 3.06 2.91 ]
  step 2 qfunc [ 3.30 3.34 3.38 3.92 ]
  step 3 qfunc [ 3.61 3.84 3.90 3.95 ]
  step 4 qfunc [ 3.96 3.96 3.97 3.93 ]
  step 5 qfunc [ 3.97 3.95 3.92 3.98 ]
68: score 0.503 initialq 3.93 opt [[1 1 3 0 0 2]]
  step 0 qfunc [ 2.69 3.76 2.85 2.67 ]
  step 1 qfunc [ 2.94 3.87 2.99 2.83 ]
  step 2 qfunc [ 2.84 3.22 2.99 3.93 ]
  step 3 qfunc [ 3.95 3.51 3.46 3.55 ]
  step 4 qfunc [ 3.97 3.94 3.80 3.95 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
69: score 0.500 initialq 3.93 opt [[1 1 3 0 1 2]]
  step 0 qfunc [ 2.72 3.79 2.82 2.79 ]
  step 1 qfunc [ 2.84 3.88 2.92 2.95 ]
  step 2 qfunc [ 3.28 3.13 2.75 3.92 ]
  step 3 qfunc [ 3.94 3.86 3.47 3.94 ]
  step 4 qfunc [ 3.95 3.96 3.94 3.96 ]
  step 5 qfunc [ 2.66 2.43 3.98 2.99 ]
70: score 0.526 initialq 3.93 opt [[1 2 1 1 3 1]]
  step 0 qfunc [ 2.78 3.85 2.90 2.76 ]
  step 1 qfunc [ 2.80 2.79 3.90 2.83 ]
  step 2 qfunc [ 3.18 3.92 3.10 3.06 ]
  step 3 qfunc [ 3.30 3.95 3.34 3.77 ]
  step 4 qfunc [ 3.32 3.95 3.06 3.97 ]
  step 5 qfunc [ 3.97 3.98 3.97 3.97 ]
71: score 0.750 initialq 3.93 opt [[1 1 3 1 2 1]]
  step 0 qfunc [ 2.73 3.79 2.84 2.54 ]
  step 1 qfunc [ 3.00 3.86 3.14 2.94 ]
  step 2 qfunc [ 3.07 3.34 3.17 3.91 ]
  step 3 qfunc [ 3.89 3.94 3.89 3.93 ]
  step 4 qfunc [ 2.95 3.58 3.96 3.80 ]
  step 5 qfunc [ 3.17 3.98 3.82 3.94 ]
72: score 0.500 initialq 3.93 opt [[1 1 3 1 0 2]]
  step 0 qfunc [ 2.60 3.75 2.76 2.62 ]
  step 1 qfunc [ 3.08 3.84 2.96 2.76 ]
  step 2 qfunc [ 3.61 3.38 3.45 3.90 ]
  step 3 qfunc [ 3.89 3.93 3.77 3.77 ]
  step 4 qfunc [ 3.95 3.75 3.91 3.32 ]
  step 5 qfunc [ 2.99 3.00 3.96 3.00 ]
73: score 0.742 initialq 3.93 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.74 3.76 3.01 2.59 ]
  step 1 qfunc [ 3.01 3.86 3.10 2.95 ]
  step 2 qfunc [ 3.30 3.23 3.19 3.92 ]
  step 3 qfunc [ 3.95 3.79 3.68 3.91 ]
  step 4 qfunc [ 3.96 3.93 3.96 3.96 ]
  step 5 qfunc [ 2.98 2.99 3.97 2.99 ]
74: score 0.703 initialq 3.93 opt [[1 3 2 3 1 3]]
  step 0 qfunc [ 2.71 3.83 2.85 2.69 ]
  step 1 qfunc [ 2.99 2.88 3.01 3.88 ]
  step 2 qfunc [ 2.81 3.27 3.90 2.85 ]
  step 3 qfunc [ 3.39 3.90 3.23 3.94 ]
  step 4 qfunc [ 2.99 3.95 3.00 2.99 ]
  step 5 qfunc [ 2.99 3.00 3.00 3.97 ]
75: score 0.602 initialq 3.93 opt [[1 1 3 2 2 3]]
  step 0 qfunc [ 2.70 3.80 2.98 2.72 ]
  step 1 qfunc [ 3.04 3.87 3.16 2.89 ]
  step 2 qfunc [ 3.54 3.45 3.45 3.92 ]
  step 3 qfunc [ 3.93 3.89 3.94 3.93 ]
  step 4 qfunc [ 3.94 3.95 3.97 3.74 ]
  step 5 qfunc [ 3.28 3.43 3.04 3.99 ]
76: score 0.586 initialq 3.93 opt [[1 3 1 3 0 2]]
  step 0 qfunc [ 2.71 3.77 2.84 2.73 ]
  step 1 qfunc [ 2.89 2.95 2.98 3.91 ]
  step 2 qfunc [ 2.82 3.93 2.99 2.85 ]
  step 3 qfunc [ 3.26 3.16 2.95 3.96 ]
  step 4 qfunc [ 3.97 3.61 3.91 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
77: score 0.750 initialq 3.93 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.69 3.75 2.70 2.69 ]
  step 1 qfunc [ 2.87 3.88 2.96 2.85 ]
  step 2 qfunc [ 2.95 2.91 2.99 3.93 ]
  step 3 qfunc [ 3.19 3.13 3.35 3.94 ]
  step 4 qfunc [ 3.63 2.90 3.93 3.96 ]
  step 5 qfunc [ 2.99 2.98 3.97 2.99 ]
78: score 0.500 initialq 3.93 opt [[1 1 2 1 2 3]]
  step 0 qfunc [ 2.71 3.80 2.91 2.67 ]
  step 1 qfunc [ 2.92 3.87 3.17 2.77 ]
  step 2 qfunc [ 3.44 3.33 3.92 3.48 ]
  step 3 qfunc [ 3.87 3.94 3.55 3.78 ]
  step 4 qfunc [ 3.96 3.96 3.97 3.90 ]
  step 5 qfunc [ 3.06 3.04 3.14 3.98 ]
79: score 0.750 initialq 3.93 opt [[1 2 1 1 3 3]]
  step 0 qfunc [ 2.80 3.83 2.95 2.73 ]
  step 1 qfunc [ 2.99 3.14 3.90 2.98 ]
  step 2 qfunc [ 3.17 3.92 3.24 2.98 ]
  step 3 qfunc [ 3.14 3.94 3.07 3.93 ]
  step 4 qfunc [ 2.85 3.77 2.90 3.97 ]
  step 5 qfunc [ 3.92 3.96 3.98 3.99 ]
80: score 0.612 initialq 3.93 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.76 3.76 2.87 2.75 ]
  step 1 qfunc [ 2.86 3.88 2.99 2.86 ]
  step 2 qfunc [ 2.96 2.98 2.99 3.94 ]
  step 3 qfunc [ 3.53 3.43 2.79 3.96 ]
  step 4 qfunc [ 3.97 3.96 3.02 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
81: score 0.500 initialq 3.93 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.68 3.79 2.91 2.73 ]
  step 1 qfunc [ 2.98 3.86 3.34 2.87 ]
  step 2 qfunc [ 3.54 3.40 3.69 3.91 ]
  step 3 qfunc [ 3.94 3.91 3.90 3.88 ]
  step 4 qfunc [ 3.57 3.95 3.95 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
82: score 0.753 initialq 3.92 opt [[1 3 1 3 3 2]]
  step 0 qfunc [ 2.74 3.74 2.88 2.74 ]
  step 1 qfunc [ 2.87 2.97 2.99 3.90 ]
  step 2 qfunc [ 2.78 3.93 2.99 2.73 ]
  step 3 qfunc [ 3.21 3.31 2.99 3.95 ]
  step 4 qfunc [ 3.94 3.14 3.96 3.96 ]
  step 5 qfunc [ 2.99 2.96 3.97 2.99 ]
83: score 0.750 initialq 3.92 opt [[1 2 1 3 2 2]]
  step 0 qfunc [ 2.58 3.85 2.91 2.73 ]
  step 1 qfunc [ 3.07 3.32 3.90 2.95 ]
  step 2 qfunc [ 3.33 3.93 3.02 3.48 ]
  step 3 qfunc [ 3.66 3.69 3.44 3.95 ]
  step 4 qfunc [ 3.83 3.96 3.97 3.87 ]
  step 5 qfunc [ 3.95 3.97 3.98 3.96 ]
84: score 0.773 initialq 3.92 opt [[1 1 3 3 2 2]]
  step 0 qfunc [ 2.74 3.76 2.64 2.76 ]
  step 1 qfunc [ 3.00 3.84 3.14 3.08 ]
  step 2 qfunc [ 3.26 3.20 3.57 3.90 ]
  step 3 qfunc [ 3.90 3.55 3.51 3.91 ]
  step 4 qfunc [ 3.91 3.91 3.94 3.88 ]
  step 5 qfunc [ 3.91 3.58 3.97 3.92 ]
85: score 0.714 initialq 3.92 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.63 3.81 2.96 2.69 ]
  step 1 qfunc [ 2.95 3.27 3.88 3.01 ]
  step 2 qfunc [ 3.07 3.55 3.09 3.91 ]
  step 3 qfunc [ 3.28 3.92 3.12 3.32 ]
  step 4 qfunc [ 3.80 3.68 3.63 3.96 ]
  step 5 qfunc [ 3.91 3.66 3.97 3.65 ]
86: score 0.719 initialq 3.92 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.76 3.60 2.91 2.76 ]
  step 1 qfunc [ 2.90 3.87 2.98 2.89 ]
  step 2 qfunc [ 2.93 2.91 2.87 3.94 ]
  step 3 qfunc [ 3.24 3.25 2.48 3.96 ]
  step 4 qfunc [ 3.86 3.10 3.14 3.97 ]
  step 5 qfunc [ 2.98 2.96 3.98 2.99 ]
87: score 0.750 initialq 3.92 opt [[1 3 1 3 3 2]]
  step 0 qfunc [ 2.70 3.78 2.85 2.66 ]
  step 1 qfunc [ 2.87 2.87 2.98 3.91 ]
  step 2 qfunc [ 2.82 3.93 2.98 2.76 ]
  step 3 qfunc [ 3.24 3.05 2.96 3.95 ]
  step 4 qfunc [ 3.86 3.62 3.90 3.97 ]
  step 5 qfunc [ 2.99 2.98 3.98 2.99 ]
88: score 0.766 initialq 3.92 opt [[1 2 0 1 3 3]]
  step 0 qfunc [ 2.70 3.76 2.90 2.73 ]
  step 1 qfunc [ 3.04 3.14 3.86 2.99 ]
  step 2 qfunc [ 3.90 3.29 3.21 3.01 ]
  step 3 qfunc [ 3.01 3.92 3.07 3.06 ]
  step 4 qfunc [ 3.88 3.93 3.91 3.95 ]
  step 5 qfunc [ 3.24 3.31 3.13 3.96 ]
89: score 0.747 initialq 3.92 opt [[1 1 3 0 3 2]]
  step 0 qfunc [ 2.75 3.70 2.88 2.69 ]
  step 1 qfunc [ 2.90 3.87 2.99 2.89 ]
  step 2 qfunc [ 3.02 3.00 2.92 3.93 ]
  step 3 qfunc [ 3.95 3.20 2.51 3.35 ]
  step 4 qfunc [ 3.26 3.16 2.95 3.96 ]
  step 5 qfunc [ 2.99 2.99 3.96 2.99 ]
90: score 0.596 initialq 3.92 opt [[1 2 3 3 1 3]]
  step 0 qfunc [ 2.69 3.79 2.85 2.62 ]
  step 1 qfunc [ 2.85 3.16 3.88 3.04 ]
  step 2 qfunc [ 3.17 3.30 3.11 3.90 ]
  step 3 qfunc [ 3.21 3.86 3.27 3.93 ]
  step 4 qfunc [ 2.96 3.94 2.98 2.95 ]
  step 5 qfunc [ 2.99 3.01 3.00 3.96 ]
91: score 0.750 initialq 3.92 opt [[2 1 1 1 2 3]]
  step 0 qfunc [ 2.69 3.04 3.80 2.58 ]
  step 1 qfunc [ 2.66 3.86 2.86 2.76 ]
  step 2 qfunc [ 3.18 3.88 3.20 3.23 ]
  step 3 qfunc [ 3.40 3.92 3.91 3.38 ]
  step 4 qfunc [ 3.95 3.93 3.95 3.72 ]
  step 5 qfunc [ 3.02 2.99 3.05 3.97 ]
92: score 0.500 initialq 3.92 opt [[1 2 1 1 0 1]]
  step 0 qfunc [ 2.75 3.82 2.83 2.87 ]
  step 1 qfunc [ 2.82 3.18 3.88 3.03 ]
  step 2 qfunc [ 2.99 3.90 3.14 3.12 ]
  step 3 qfunc [ 3.12 3.93 3.37 3.83 ]
  step 4 qfunc [ 3.96 3.94 3.93 3.91 ]
  step 5 qfunc [ 3.96 3.97 3.95 3.42 ]
93: score 0.750 initialq 3.91 opt [[1 1 3 3 2 3]]
  step 0 qfunc [ 2.80 3.80 2.85 2.80 ]
  step 1 qfunc [ 2.94 3.86 3.01 2.93 ]
  step 2 qfunc [ 3.58 3.58 3.60 3.93 ]
  step 3 qfunc [ 3.93 3.94 3.93 3.95 ]
  step 4 qfunc [ 3.96 3.95 3.98 3.90 ]
  step 5 qfunc [ 3.98 3.98 3.96 3.99 ]
94: score 0.669 initialq 3.91 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.67 3.76 2.79 2.60 ]
  step 1 qfunc [ 2.76 3.84 2.97 2.91 ]
  step 2 qfunc [ 3.27 3.01 2.94 3.94 ]
  step 3 qfunc [ 3.95 3.80 3.76 3.96 ]
  step 4 qfunc [ 3.97 3.95 3.97 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.99 2.99 ]
95: score 0.750 initialq 3.91 opt [[1 2 3 1 3 2]]
  step 0 qfunc [ 2.82 3.81 2.92 2.62 ]
  step 1 qfunc [ 3.05 3.34 3.88 2.89 ]
  step 2 qfunc [ 3.25 3.48 3.30 3.90 ]
  step 3 qfunc [ 3.16 3.91 3.35 3.13 ]
  step 4 qfunc [ 3.76 3.63 3.93 3.95 ]
  step 5 qfunc [ 3.77 3.95 3.97 3.95 ]
96: score 0.727 initialq 3.90 opt [[1 1 3 3 3 2]]
  step 0 qfunc [ 2.66 3.76 2.80 2.72 ]
  step 1 qfunc [ 2.85 3.85 2.98 2.96 ]
  step 2 qfunc [ 3.10 3.12 2.95 3.93 ]
  step 3 qfunc [ 3.93 3.56 3.37 3.95 ]
  step 4 qfunc [ 3.97 3.90 3.94 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
97: score 0.750 initialq 3.90 opt [[1 1 3 3 0 2]]
  step 0 qfunc [ 2.76 3.74 2.86 2.70 ]
  step 1 qfunc [ 2.89 3.86 2.98 2.85 ]
  step 2 qfunc [ 3.05 3.00 2.87 3.94 ]
  step 3 qfunc [ 3.47 3.20 2.52 3.96 ]
  step 4 qfunc [ 3.97 3.42 3.89 3.97 ]
  step 5 qfunc [ 2.99 2.99 3.98 2.99 ]
98: score 0.706 initialq 3.84 opt [[1 3 1 1 3 2]]
  step 0 qfunc [ 2.74 3.55 2.91 2.80 ]
  step 1 qfunc [ 2.86 2.81 2.95 3.83 ]
  step 2 qfunc [ 2.75 3.87 2.97 2.84 ]
  step 3 qfunc [ 2.91 3.89 2.91 3.40 ]
  step 4 qfunc [ 2.74 2.71 2.99 3.91 ]
  step 5 qfunc [ 2.99 2.98 3.92 2.99 ]
99: score 0.539 initialq 3.83 opt [[1 2 1 1 1 3]]
  step 0 qfunc [ 2.70 3.72 2.91 2.73 ]
  step 1 qfunc [ 2.88 3.07 3.79 2.95 ]
  step 2 qfunc [ 3.05 3.82 3.04 3.07 ]
  step 3 qfunc [ 3.63 3.95 3.42 3.60 ]
  step 4 qfunc [ 3.97 3.97 3.95 3.95 ]
  step 5 qfunc [ 3.04 3.01 3.07 3.98 ]

In [2]:
'''
Checking correlations
'''
data11 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/stats-runB.npz')

data21 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/mcts-rtype1-rollouts3000-trajectories100-real1-runB.npz')

data51 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/initialq-rtype1-rollouts100000-runB.npz')

data61 = np.load('experiments/test2_model_small-dropout8-shuffle0-data-test2-n100000-l5-random.pickle/optpolicy-rtype1-rollouts20000-runB.npz')

vloss = data11['vloss']
scores = data21['scores'][:,0]
initialq = data51['qvals'][:,0]
opts = data61['opts']
qfuncs = data61['qs'][:,0,:,:]

sorted_score_ix = np.flip(np.argsort(initialq), 0)
sorted_scores = scores[sorted_score_ix]
sorted_initialq = initialq[sorted_score_ix]
sorted_opts = opts[sorted_score_ix,:]
sorted_qfuncs = qfuncs[sorted_score_ix,:,:]

for r in six.moves.range(scores.shape[0]):
    six.print_('{:2d}: score {:.3f} initialq {:.2f} opt {}'.format(r, sorted_scores[r], sorted_initialq[r], sorted_opts[r,:]))
    for t in six.moves.range(6):
        six.print_('  step {} qfunc [ {} ]'.format(t, ' '.join(['{:.2f}'.format(q) for q in sorted_qfuncs[r,t,:]])))
    pass


 0: score 0.750 initialq 3.46 opt [[2 1 1 3 3 0]]
  step 0 qfunc [ 2.24 2.53 3.41 2.27 ]
  step 1 qfunc [ 2.33 3.44 2.34 2.38 ]
  step 2 qfunc [ 2.54 3.46 2.42 2.52 ]
  step 3 qfunc [ 2.79 3.15 2.44 3.50 ]
  step 4 qfunc [ 3.51 3.45 2.45 3.51 ]
  step 5 qfunc [ 3.52 3.31 3.52 3.51 ]
 1: score 0.500 initialq 3.45 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.25 3.39 2.33 2.24 ]
  step 1 qfunc [ 2.46 3.43 2.62 2.50 ]
  step 2 qfunc [ 2.69 2.82 2.69 3.48 ]
  step 3 qfunc [ 3.38 3.28 3.50 3.47 ]
  step 4 qfunc [ 3.51 3.51 3.49 3.50 ]
  step 5 qfunc [ 3.52 3.52 3.52 3.21 ]
 2: score 0.539 initialq 3.45 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.20 3.38 2.41 2.19 ]
  step 1 qfunc [ 2.27 3.42 2.61 2.40 ]
  step 2 qfunc [ 2.57 2.69 2.54 3.47 ]
  step 3 qfunc [ 3.27 3.35 3.49 3.23 ]
  step 4 qfunc [ 3.48 3.50 3.00 3.46 ]
  step 5 qfunc [ 3.52 3.45 3.51 3.29 ]
 3: score 0.500 initialq 3.44 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.24 3.37 2.29 2.23 ]
  step 1 qfunc [ 2.40 3.42 2.48 2.39 ]
  step 2 qfunc [ 2.39 2.52 2.58 3.48 ]
  step 3 qfunc [ 3.10 3.19 3.49 2.77 ]
  step 4 qfunc [ 3.50 3.50 3.15 3.40 ]
  step 5 qfunc [ 3.51 3.51 3.29 3.06 ]
 4: score 0.750 initialq 3.42 opt [[1 1 3 2 1 2]]
  step 0 qfunc [ 2.21 3.36 2.35 2.26 ]
  step 1 qfunc [ 2.38 3.40 2.49 2.31 ]
  step 2 qfunc [ 2.39 2.67 2.86 3.47 ]
  step 3 qfunc [ 3.03 3.27 3.49 3.29 ]
  step 4 qfunc [ 3.44 3.50 3.41 3.28 ]
  step 5 qfunc [ 3.51 3.48 3.51 2.62 ]
 5: score 0.500 initialq 3.41 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.27 3.31 2.31 2.08 ]
  step 1 qfunc [ 2.45 3.37 2.58 2.40 ]
  step 2 qfunc [ 2.62 2.61 3.12 3.45 ]
  step 3 qfunc [ 3.02 2.88 3.47 2.93 ]
  step 4 qfunc [ 3.48 3.23 3.30 3.27 ]
  step 5 qfunc [ 3.49 3.26 3.38 3.27 ]
 6: score 0.500 initialq 3.40 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.25 3.32 2.33 2.15 ]
  step 1 qfunc [ 2.37 3.38 2.38 2.29 ]
  step 2 qfunc [ 2.54 2.68 2.69 3.45 ]
  step 3 qfunc [ 3.00 3.09 3.46 3.20 ]
  step 4 qfunc [ 3.32 3.47 3.40 3.41 ]
  step 5 qfunc [ 3.49 3.46 3.48 2.71 ]
 7: score 0.682 initialq 3.40 opt [[1 2 1 3 2 0]]
  step 0 qfunc [ 2.25 3.34 2.40 2.17 ]
  step 1 qfunc [ 2.43 2.55 3.38 2.35 ]
  step 2 qfunc [ 2.61 3.40 2.50 2.59 ]
  step 3 qfunc [ 2.96 2.99 2.50 3.44 ]
  step 4 qfunc [ 3.44 3.18 3.45 3.40 ]
  step 5 qfunc [ 3.46 2.90 3.44 3.16 ]
 8: score 0.596 initialq 3.38 opt [[1 2 1 3 1 2]]
  step 0 qfunc [ 2.22 3.33 2.42 2.24 ]
  step 1 qfunc [ 2.48 2.87 3.36 2.47 ]
  step 2 qfunc [ 2.57 3.38 2.57 2.69 ]
  step 3 qfunc [ 2.80 3.15 2.53 3.43 ]
  step 4 qfunc [ 3.43 3.44 3.42 3.31 ]
  step 5 qfunc [ 3.44 3.44 3.46 3.26 ]
 9: score 0.500 initialq 3.38 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.26 3.29 2.35 2.35 ]
  step 1 qfunc [ 2.50 3.35 2.63 2.49 ]
  step 2 qfunc [ 2.68 2.69 2.87 3.42 ]
  step 3 qfunc [ 3.08 3.12 3.44 2.90 ]
  step 4 qfunc [ 3.45 3.37 3.20 3.38 ]
  step 5 qfunc [ 3.46 3.42 3.20 2.82 ]
10: score 1.000 initialq 3.38 opt [[2 1 1 3 3 2]]
  step 0 qfunc [ 2.25 2.50 3.32 2.23 ]
  step 1 qfunc [ 2.34 3.36 2.26 2.32 ]
  step 2 qfunc [ 2.58 3.38 2.54 2.62 ]
  step 3 qfunc [ 2.51 2.81 2.54 3.42 ]
  step 4 qfunc [ 3.25 3.21 3.19 3.44 ]
  step 5 qfunc [ 2.57 2.54 3.46 2.53 ]
11: score 0.500 initialq 3.38 opt [[1 1 3 2 0 0]]
  step 0 qfunc [ 2.23 3.29 2.37 2.22 ]
  step 1 qfunc [ 2.36 3.35 2.66 2.42 ]
  step 2 qfunc [ 2.72 2.62 2.69 3.41 ]
  step 3 qfunc [ 3.08 2.90 3.43 3.16 ]
  step 4 qfunc [ 3.45 3.39 3.38 3.39 ]
  step 5 qfunc [ 3.47 3.38 3.36 3.30 ]
12: score 0.760 initialq 3.36 opt [[1 2 1 3 2 0]]
  step 0 qfunc [ 2.20 3.30 2.32 2.16 ]
  step 1 qfunc [ 2.29 2.65 3.34 2.24 ]
  step 2 qfunc [ 2.44 3.36 2.59 2.50 ]
  step 3 qfunc [ 2.81 2.78 3.04 3.42 ]
  step 4 qfunc [ 3.36 3.34 3.43 2.88 ]
  step 5 qfunc [ 3.47 3.46 3.44 3.43 ]
13: score 0.971 initialq 3.36 opt [[1 2 1 3 3 2]]
  step 0 qfunc [ 2.26 3.29 2.39 2.30 ]
  step 1 qfunc [ 2.37 2.49 3.35 2.42 ]
  step 2 qfunc [ 2.45 3.37 2.50 2.55 ]
  step 3 qfunc [ 2.53 2.75 2.47 3.40 ]
  step 4 qfunc [ 3.31 3.21 2.15 3.42 ]
  step 5 qfunc [ 2.64 2.58 3.51 2.53 ]
14: score 0.521 initialq 3.36 opt [[1 2 1 3 0 0]]
  step 0 qfunc [ 2.22 3.28 2.46 2.26 ]
  step 1 qfunc [ 2.46 2.72 3.33 2.43 ]
  step 2 qfunc [ 2.60 3.36 2.62 2.70 ]
  step 3 qfunc [ 3.21 3.05 3.16 3.38 ]
  step 4 qfunc [ 3.39 3.20 3.35 3.35 ]
  step 5 qfunc [ 3.40 3.30 3.27 2.91 ]
15: score 0.500 initialq 3.35 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.24 3.27 2.36 2.23 ]
  step 1 qfunc [ 2.44 3.32 2.48 2.47 ]
  step 2 qfunc [ 2.67 2.74 2.57 3.40 ]
  step 3 qfunc [ 3.13 3.21 3.42 3.03 ]
  step 4 qfunc [ 3.35 3.43 3.31 3.25 ]
  step 5 qfunc [ 3.45 3.40 3.37 3.37 ]
16: score 0.521 initialq 3.35 opt [[1 2 1 3 0 2]]
  step 0 qfunc [ 2.17 3.26 2.38 2.13 ]
  step 1 qfunc [ 2.38 2.55 3.30 2.35 ]
  step 2 qfunc [ 2.48 3.32 2.49 2.41 ]
  step 3 qfunc [ 2.64 2.66 2.51 3.43 ]
  step 4 qfunc [ 3.44 3.21 3.17 3.18 ]
  step 5 qfunc [ 3.44 3.34 3.45 2.61 ]
17: score 0.521 initialq 3.35 opt [[1 1 3 2 1 1]]
  step 0 qfunc [ 2.20 3.29 2.43 2.27 ]
  step 1 qfunc [ 2.53 3.33 2.64 2.40 ]
  step 2 qfunc [ 2.66 2.60 2.64 3.40 ]
  step 3 qfunc [ 3.21 3.21 3.42 2.91 ]
  step 4 qfunc [ 3.42 3.43 3.39 3.03 ]
  step 5 qfunc [ 3.44 3.45 3.45 3.34 ]
18: score 0.500 initialq 3.33 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.27 3.26 2.39 2.22 ]
  step 1 qfunc [ 2.44 3.30 2.55 2.46 ]
  step 2 qfunc [ 2.68 2.72 2.74 3.35 ]
  step 3 qfunc [ 3.01 3.20 3.36 3.05 ]
  step 4 qfunc [ 3.31 3.38 3.34 3.35 ]
  step 5 qfunc [ 3.41 3.38 3.37 2.97 ]
19: score 0.740 initialq 3.32 opt [[1 2 1 3 2 0]]
  step 0 qfunc [ 2.14 3.26 2.44 2.13 ]
  step 1 qfunc [ 2.44 2.70 3.30 2.43 ]
  step 2 qfunc [ 2.54 3.32 2.50 2.60 ]
  step 3 qfunc [ 2.85 3.05 2.66 3.38 ]
  step 4 qfunc [ 3.37 3.31 3.40 3.35 ]
  step 5 qfunc [ 3.41 3.20 3.38 3.28 ]
20: score 0.750 initialq 3.32 opt [[1 1 3 2 2 0]]
  step 0 qfunc [ 2.21 3.26 2.38 2.25 ]
  step 1 qfunc [ 2.46 3.30 2.58 2.43 ]
  step 2 qfunc [ 2.65 2.50 2.58 3.35 ]
  step 3 qfunc [ 3.13 3.25 3.37 2.65 ]
  step 4 qfunc [ 3.36 3.35 3.38 3.17 ]
  step 5 qfunc [ 3.39 3.38 3.39 3.13 ]
21: score 0.500 initialq 3.32 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.24 3.26 2.32 2.19 ]
  step 1 qfunc [ 2.45 3.30 2.51 2.41 ]
  step 2 qfunc [ 2.69 2.73 2.50 3.35 ]
  step 3 qfunc [ 3.18 3.15 3.37 2.81 ]
  step 4 qfunc [ 3.37 3.38 3.32 3.18 ]
  step 5 qfunc [ 3.39 3.39 3.36 3.29 ]
22: score 0.750 initialq 3.32 opt [[1 1 3 2 2 0]]
  step 0 qfunc [ 2.26 3.24 2.43 2.21 ]
  step 1 qfunc [ 2.40 3.29 2.57 2.39 ]
  step 2 qfunc [ 2.68 2.59 2.73 3.34 ]
  step 3 qfunc [ 3.09 3.08 3.36 2.85 ]
  step 4 qfunc [ 3.34 3.36 3.37 3.33 ]
  step 5 qfunc [ 3.40 3.34 3.22 3.21 ]
23: score 0.750 initialq 3.31 opt [[2 1 1 3 2 0]]
  step 0 qfunc [ 2.32 2.56 3.22 2.23 ]
  step 1 qfunc [ 2.34 3.27 2.22 2.27 ]
  step 2 qfunc [ 2.55 3.29 2.56 2.62 ]
  step 3 qfunc [ 2.97 2.71 3.07 3.39 ]
  step 4 qfunc [ 3.23 3.16 3.40 2.99 ]
  step 5 qfunc [ 3.44 3.31 3.36 2.67 ]
24: score 0.500 initialq 3.31 opt [[1 1 3 2 1 0]]
  step 0 qfunc [ 2.28 3.26 2.40 2.25 ]
  step 1 qfunc [ 2.41 3.29 2.54 2.41 ]
  step 2 qfunc [ 2.63 2.59 2.50 3.37 ]
  step 3 qfunc [ 3.33 3.01 3.39 2.75 ]
  step 4 qfunc [ 3.40 3.40 3.37 3.17 ]
  step 5 qfunc [ 3.43 3.42 3.41 3.30 ]
25: score 0.768 initialq 3.27 opt [[1 1 1 1 3 2]]
  step 0 qfunc [ 2.23 3.19 2.28 2.22 ]
  step 1 qfunc [ 2.28 3.28 2.33 2.25 ]
  step 2 qfunc [ 2.37 3.35 2.35 2.62 ]
  step 3 qfunc [ 2.32 3.40 2.37 3.32 ]
  step 4 qfunc [ 2.25 2.24 2.40 3.48 ]
  step 5 qfunc [ 2.62 2.55 3.48 2.57 ]
26: score 1.000 initialq 3.27 opt [[1 1 2 3 3 2]]
  step 0 qfunc [ 2.31 3.18 2.37 2.27 ]
  step 1 qfunc [ 2.39 3.25 2.50 2.41 ]
  step 2 qfunc [ 2.56 2.54 3.36 2.73 ]
  step 3 qfunc [ 2.39 2.92 2.39 3.37 ]
  step 4 qfunc [ 3.00 2.85 2.94 3.38 ]
  step 5 qfunc [ 2.74 2.60 3.52 2.57 ]
27: score 1.000 initialq 3.27 opt [[1 1 3 2 3 2]]
  step 0 qfunc [ 2.22 3.18 2.36 2.14 ]
  step 1 qfunc [ 2.43 3.23 2.48 2.37 ]
  step 2 qfunc [ 2.44 2.52 2.61 3.35 ]
  step 3 qfunc [ 3.14 3.08 3.36 2.90 ]
  step 4 qfunc [ 3.38 3.31 3.34 3.38 ]
  step 5 qfunc [ 2.71 2.53 3.39 2.53 ]
28: score 0.557 initialq 3.26 opt [[1 2 1 3 1 2]]
  step 0 qfunc [ 2.18 3.19 2.44 2.25 ]
  step 1 qfunc [ 2.46 2.73 3.24 2.41 ]
  step 2 qfunc [ 2.51 3.25 2.56 2.58 ]
  step 3 qfunc [ 2.60 3.01 2.59 3.36 ]
  step 4 qfunc [ 3.21 3.38 3.14 3.17 ]
  step 5 qfunc [ 3.37 3.36 3.41 3.00 ]
29: score 0.742 initialq 3.18 opt [[1 3 1 3 2 0]]
  step 0 qfunc [ 2.25 3.17 2.46 2.27 ]
  step 1 qfunc [ 2.47 2.66 2.61 3.27 ]
  step 2 qfunc [ 2.37 3.29 2.50 2.35 ]
  step 3 qfunc [ 2.42 2.62 2.58 3.39 ]
  step 4 qfunc [ 2.97 2.76 3.42 2.72 ]
  step 5 qfunc [ 3.44 3.18 2.82 3.05 ]

In [ ]: