In [1]:
%matplotlib inline
In [2]:
import gym
import tensorflow as tf
import numpy as np
import math
from tqdm import tqdm, trange
import random
import time
In [3]:
from experiencereplay import ExperienceReplay, PrioritizedExperienceReplay, ModelBasedPrioritizedExperienceReplay
from experiment import Experiment
from dmlac import DMLAC
import nn
from exploration import EpsilonGreedyStrategy, OUStrategy
In [4]:
settings = dict(
environment = 'Reacher-v1',
timesteps = 8000,
batch_size = 64,
learning_start = 256,
forward_steps = 1,
discount_factor = 0.9,
trace_decay = 0.5,
max_replay_buffer_size = 100000,
actor_learning_rate=0.0001,
model_learning_rate=0.001,
reward_learning_rate=0.001,
value_learning_rate=0.001,
actor_l2=None,
model_l2=None,
reward_l2=None,
value_l2=None,
actor_target_approach_rate=0.99,
value_target_approach_rate=0.99,
train_updates_per_step = 10,
priority_updates_per_step = 100,
actor_net_layers = [256, 128],
actor_net_activation_fn = tf.nn.tanh,
actor_bounded_output = True,
value_net_layers = [256, 128],
value_net_activation_fn = tf.nn.elu,
model_net_embedding = 128,
model_net_layers = [128],
model_net_activation_fn = tf.nn.elu,
reward_net_embedding = 128,
reward_net_layers = [128],
reward_net_activation_fn = tf.nn.elu,
environment_seed = 0,
noise_seed= 0,
gpu_memory_fraction = 0.1,
render_start=5500,
render_environment = True,
render_frequency = 10,
)
settings["experiment_path"] = "experiments/experiment_dmlac_{}_{}".format(settings["environment"], int(time.time()))
settings["actor_tf_optimizer"] = tf.train.AdamOptimizer(settings["actor_learning_rate"])
settings["model_tf_optimizer"] = tf.train.AdamOptimizer(settings["model_learning_rate"])
settings["reward_tf_optimizer"] = tf.train.AdamOptimizer(settings["reward_learning_rate"])
settings["value_tf_optimizer"] = tf.train.AdamOptimizer(settings["value_learning_rate"])
print(settings["experiment_path"])
experiments/experiment_dmlac_Reacher-v1_1495418098
In [5]:
def preprocess_state(observation):
state = np.array(observation)
if settings["environment"] == "MountainCarContinuous-v0":
state[1] = state[1] * 10
return state
else:
return state
def preprocess_reward(reward):
return reward
In [6]:
env = gym.make(settings["environment"])
env.seed(settings["environment_seed"])
observation = preprocess_state(env.reset())
state = observation
[2017-05-22 03:54:58,198] Making new env: Reacher-v1
In [7]:
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
print(state_dim)
print(action_dim)
print(env.observation_space.high)
print(env.observation_space.low)
print(env.action_space.high)
print(env.action_space.low)
print(str(state))
11
2
[ inf inf inf inf inf inf inf inf inf inf inf]
[-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
[ 1. 1.]
[-1. -1.]
[ 0.99603073 0.99567135 -0.08901003 0.09294385 -0.1590063 0.06923054
-0.00107423 0.00169846 0.36860851 -0.07769702 0. ]
In [8]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=settings["gpu_memory_fraction"])
session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
summary_writer = tf.summary.FileWriter(settings["experiment_path"] + "/logdir")
In [9]:
actor_network = nn.fully_connected("Actor", session, [state_dim], settings["actor_net_layers"],
action_dim, settings["actor_net_activation_fn"],
env.action_space if settings["actor_bounded_output"] else None, True)
model_network = nn.fully_connected_with_input_embedding(
"Model", session, [state_dim, action_dim], settings["model_net_embedding"], settings["model_net_layers"], state_dim,
settings["model_net_activation_fn"], None, False)
reward_network = nn.fully_connected_with_input_embedding(
"Reward", session, [state_dim, action_dim], settings["reward_net_embedding"], settings["reward_net_layers"], 1,
settings["reward_net_activation_fn"], None, False)
value_network = nn.fully_connected("Value", session, [state_dim], settings["value_net_layers"], 1,
settings["value_net_activation_fn"], None, False)
print(str(actor_network))
[] --> Actor_input_0
['Actor_input_0'] --> bn_hidden_0
['bn_hidden_0'] --> hidden_0_256
['hidden_0_256'] --> bn_hidden_1
['bn_hidden_1'] --> hidden_1_128
['hidden_1_128'] --> bn_output
['bn_output'] --> output
['output'] --> bounding
In [10]:
agent = DMLAC(actor_network, model_network, reward_network, value_network,
forward_steps=settings["forward_steps"],
discount_factor=settings["discount_factor"],
trace_decay=settings["trace_decay"],
actor_tf_optimizer=settings["actor_tf_optimizer"],
model_tf_optimizer=settings["model_tf_optimizer"],
reward_tf_optimizer=settings["reward_tf_optimizer"],
value_tf_optimizer=settings["value_tf_optimizer"],
actor_l2=settings["actor_l2"],
model_l2=settings["model_l2"],
reward_l2=settings["reward_l2"],
value_l2=settings["value_l2"],
actor_target_approach_rate=settings["actor_target_approach_rate"],
value_target_approach_rate=settings["value_target_approach_rate"],
summary_writer=summary_writer
)
In [11]:
saver = tf.train.Saver()
summary_writer.add_graph(session.graph)
In [12]:
experience_replay = ModelBasedPrioritizedExperienceReplay(agent, env, settings["max_replay_buffer_size"], False)
In [13]:
exploration_strategy = EpsilonGreedyStrategy(agent, env, settings["noise_seed"])
In [14]:
exp = Experiment(settings["experiment_path"], session, env, settings, settings["render_environment"], settings["render_frequency"], settings["render_start"])
progress_bar = tqdm(total=settings["timesteps"])
e_t = 0
for t in xrange(settings["timesteps"]):
e_t += 1
exploration = ((settings["timesteps"] - t) / float(settings["timesteps"])) ** 4
if t < settings["learning_start"]:
exploration = 1
action = exploration_strategy.action(state, exploration)
observation, reward, done, info = env.step(action)
next_state = np.reshape(preprocess_state(observation), (state_dim,))
experience_replay.add_experience(state, action, preprocess_reward(reward), next_state, done, float("inf"))
td_error = math.fabs(experience_replay.get_last_td_error())
model_error = experience_replay.get_last_model_error()
reward_error = experience_replay.get_last_reward_error()
exp.record(t, state, action, reward, next_state, done, td_error, model_error, reward_error)
state = next_state
if done:
observation = env.reset()
state = preprocess_state(observation)
e_t = 0
if t >= settings["learning_start"]:
experience_replay.train_agent(settings["batch_size"], settings["train_updates_per_step"])
experience_replay.update_oldest_priorities(settings["priority_updates_per_step"])
progress_bar.set_description('[{}][{}] reward: {:.2f}, reward 100MA: {:.2f}, Exploration: {:.2f}, action: {}, td-error: {:.4f}, model error: {:.4f}, reward error: {:.4f}, ep_reward: {}, ep_dur: {}'
.format(t, e_t, reward, exp.reward_100ma.get_average(), exploration, str(action), td_error, model_error, reward_error, exp.last_episode_reward, exp.last_episode_duration))
progress_bar.update()
progress_bar.close()
[5509][5510] reward: -0.01, reward 100MA: -0.02, Exploration: 0.01, action: [ 0.04076123 -0.01198206], td-error: 0.0057, model error: 0.0057, reward error: 0.0000, ep_reward: 0, ep_dur: 0: 69%|██████▉ | 5510/8000 [15:51<06:42, 6.19it/s] 0:00<1:05:39, 2.03it/s][2017-05-22 04:10:52,890] GLFW error: 65544, desc: X11: RandR gamma ramp support seems broken
[7999][8000] reward: -0.01, reward 100MA: -0.00, Exploration: 0.00, action: [ 0.02528054 0.01495615], td-error: 0.0345, model error: 0.0035, reward error: 0.0000, ep_reward: 0, ep_dur: 0: 100%|██████████| 8000/8000 [23:32<00:00, 5.21it/s]
In [15]:
exp.save()
print("Experiment results saved in " + exp.path)
Experiment results saved in experiments/experiment_dmlac_Reacher-v1_1495418098
In [16]:
exp.plot_cumulative_reward()
Out[16]:
[<matplotlib.lines.Line2D at 0x7f8bee4c3250>]
In [17]:
exp.plot_reward()
Out[17]:
[<matplotlib.lines.Line2D at 0x7f8b8bd807d0>]
In [18]:
exp.plot_td_error()
Out[18]:
[<matplotlib.lines.Line2D at 0x7f8b8bc62190>]
In [19]:
exp.plot_model_error(skip_steps=settings["learning_start"]+10)
Out[19]:
[<matplotlib.lines.Line2D at 0x7f8b8bb9d410>]
In [20]:
exp.plot_reward_error(skip_steps=settings["learning_start"]+10)
Out[20]:
[<matplotlib.lines.Line2D at 0x7f8b8bad5f10>]
In [21]:
exp.plot_episode_reward()
Out[21]:
[<matplotlib.lines.Line2D at 0x7f8b8ba2a550>]
In [22]:
exp.plot_episode_duration()
Out[22]:
[<matplotlib.lines.Line2D at 0x7f8b8b967250>]
In [23]:
if settings["render_environment"]:
exp.display_frames_as_gif()
In [24]:
#session.close()
In [25]:
exp.print_all_tf_variables()
(u'Actor_hidden_0_256_W:0', array([[ 0.02184531, 0.6584869 , -0.35250694, ..., 0.02292078,
-0.14805187, 0.33367008],
[-0.45958108, -0.58810782, 0.8525458 , ..., -0.40822548,
-0.66705292, 0.19058971],
[-0.23675433, -0.80031902, 0.55785418, ..., 0.11207366,
0.11094981, 0.11024426],
...,
[-0.19149378, 0.35838678, -0.18620855, ..., 0.42209318,
-0.25108942, -0.07744219],
[ 0.30462518, 0.03072642, -0.31604305, ..., -0.02419103,
0.07561729, -0.93943244],
[-0.30388325, -0.09148772, -0.19608556, ..., 0.06734596,
0.27259329, -0.03550243]], dtype=float32))
(u'Actor_hidden_0_256_b:0', array([ 0.05225403, -1.96510601, -0.39344555, 0.39989161, -0.60444248,
-0.21370804, 0.33721957, 1.86844873, 0.85644197, -0.08228492,
0.77838361, 0.85716838, 0.04178184, -0.82109809, -0.40532449,
-0.11042746, -0.04064947, -0.37955201, -0.56409848, 1.3267051 ,
0.39185208, -0.26386055, 0.69298327, 0.36525115, 0.95740378,
0.59233439, -0.44344583, 1.20619917, -0.18045934, 0.4463723 ,
0.51054376, -0.26756474, -1.70417452, 0.12816213, 0.17174456,
-2.1803112 , 2.60706544, -0.39383215, -0.17621474, 0.61404151,
0.15888856, 0.74433446, 0.35146451, -0.77977014, 0.12555641,
-0.08325343, 0.16330998, -0.55008602, -1.22172928, -0.27347562,
1.05046058, -1.00203371, -0.65504938, -1.41270769, -0.14391871,
0.23237696, -0.25703794, 0.13728432, 1.03222656, -2.38283515,
1.90441585, 0.23441359, 1.63706362, -0.43994427, 0.08788566,
0.13450548, -1.01185167, -1.46674097, -2.17179561, 0.57918632,
-0.95134592, -1.15703106, -0.56530029, 2.14635134, 1.73849249,
-0.55352741, -0.68283421, -0.25363547, -0.6153003 , -2.2627039 ,
1.00852334, -0.83316982, -0.98977804, -1.11391377, -0.13345794,
1.86573064, 2.29964137, 1.48154366, 0.69074017, -0.76656359,
-1.28449285, -0.08142639, -1.19956243, -1.07823908, -1.57053673,
-1.38356829, -0.61908078, -1.00438833, -0.61729103, -0.81412369,
-0.72979826, 0.84975517, 0.93105364, -1.95817149, 0.59972721,
0.08624212, -0.39000517, 0.00438568, 0.34928519, 1.46953392,
0.24961494, -0.26086462, -2.57239389, 0.11736579, -0.27321225,
-0.47523114, -0.48484421, 0.63591361, 0.70478761, -0.53782272,
2.07056308, -0.80191493, -0.19013122, -0.22986045, 1.00373805,
1.99783337, -0.12021038, -0.07548336, 0.48247212, -0.12027507,
0.69388545, -0.91788816, -0.20581985, 0.91245365, -0.01377018,
-0.94014066, -0.34926328, 0.87034631, 0.94986123, -1.62572551,
-0.12956639, -0.01453394, -0.27277133, 1.28731084, 0.30077958,
0.86603314, 0.73608553, 0.31851041, -0.83856463, -1.12741506,
1.90753365, 0.16568269, -0.55544537, -0.07880834, 1.98567498,
-0.18809076, -0.15265439, -0.24641317, 0.13712762, 0.33855417,
-0.13675816, -0.61920393, -0.00759957, -0.05754153, -0.16445331,
-0.51067573, 0.79984319, 0.6358192 , 0.50204718, -0.14190881,
0.1633777 , 0.3817713 , 1.46152449, 0.63780951, 0.21187314,
-0.36858457, 0.51437694, -0.21987864, 2.36728501, 0.05327367,
-0.30428988, 0.48424911, -0.81209797, 1.32557929, 0.71793115,
0.47755986, 1.13160026, -0.10356622, 0.72104287, 0.14025733,
0.91478062, -1.71608341, -1.25190961, 0.72086811, 1.36104321,
-0.07374587, 0.07483088, -0.96683294, 0.22220197, 1.68877208,
-1.2642554 , -0.50611758, 0.14841715, -0.40712857, 1.22508574,
0.83138758, -1.36194897, 1.7648766 , 0.61511111, -0.34344384,
-1.6025517 , -0.851964 , -0.12032167, 0.57242858, -0.01648462,
-0.49457061, 0.20222802, -0.11231782, 2.00007319, -0.90879184,
-0.05790665, -2.57925868, 0.17295827, -0.35730311, 1.18308687,
0.45421374, 1.48016322, 0.18892218, -0.70938724, -1.69909942,
1.41006863, -1.11134815, -0.91127741, 1.37618935, 1.0005306 ,
-0.52864242, 0.36429957, 0.15785539, -0.4574739 , 3.09568143,
-0.41405767, 0.78312856, 0.98548985, 2.31100845, -0.07251009,
0.6178363 , -1.35155249, -0.67172533, -1.62578607, -0.13272475,
-0.36773044, 0.66079283, 0.02769758, 1.6737268 , -0.23536892,
1.60555506], dtype=float32))
(u'Actor_hidden_1_128_W:0', array([[ 0.04512298, -0.0301334 , -0.03347785, ..., -0.04040068,
0.00957241, 0.00434237],
[ 0.1200965 , 0.04163007, 0.19612187, ..., -0.0974352 ,
-0.13847293, -0.06888827],
[ 0.01145176, 0.06999621, -0.04501639, ..., 0.08291807,
0.08749606, 0.00280177],
...,
[-0.13734856, 0.03828296, 0.01030623, ..., -0.0279848 ,
-0.12181536, 0.09505183],
[ 0.00115642, -0.16149566, 0.06869428, ..., -0.18240266,
-0.06087336, 0.02762599],
[ 0.03581493, -0.0762062 , 0.01522159, ..., 0.20930295,
0.03155531, -0.00304763]], dtype=float32))
(u'Actor_hidden_1_128_b:0', array([-1.23296082, -0.34427199, -0.2468074 , 0.60844833, 0.58441478,
-0.41401914, -0.64251876, 0.82717884, -0.45151368, -2.35220671,
-0.03866522, 0.57354462, 1.77337539, 0.42734417, -0.24771035,
0.8316198 , -0.64430571, -0.7161749 , -0.93797976, -0.10598571,
-1.59137702, 1.53653598, -2.07764482, 0.89958143, 0.06102429,
0.58534038, 0.49580121, -0.17694032, -0.08095107, 1.94820559,
-0.4489969 , 0.2034803 , -0.07092656, 0.37070817, -1.09910285,
-0.40829268, -0.90832299, 0.39625376, 0.98676598, 0.14105171,
-0.64038569, -2.40160108, -0.98015416, -1.96743977, -0.85040861,
-0.27322784, 0.49255946, 0.81847465, -1.27949631, 1.17827201,
-0.58234698, 1.67010963, 0.95559764, 1.16383231, -0.12983152,
1.69960821, -0.48625013, -0.62397712, 0.3048656 , 0.29698783,
-0.49946254, -1.31595039, -1.52349162, -0.69173628, 0.82643092,
-0.29943779, -0.8906737 , -0.07811376, 0.80101609, -1.48692834,
-0.62490374, -0.35745561, 1.55897796, -0.24606775, 0.08967256,
-0.40942964, -0.81606817, 0.83049494, 0.36407429, -1.89511502,
-0.29348192, -0.54826319, -1.22048724, 0.54243463, -0.44367176,
0.82173651, 1.42351151, -0.74305719, -0.35469687, 0.42976427,
-0.91641051, -1.269557 , 2.01335835, -0.50602454, -1.72874761,
-0.81234759, -1.03061187, -0.44556883, -0.01062292, -0.33235767,
0.51959598, 0.99534804, 0.28475454, -0.03142432, -0.53567624,
0.05999714, 0.66610235, 0.3381848 , 0.92805952, -1.32177067,
0.53064811, 0.23499911, -0.49201044, 1.32452297, 0.51955897,
0.46245256, -1.13587511, 1.14528084, -0.95527709, 0.02794553,
-0.51366031, -0.33342484, -1.0516398 , 1.67574453, -0.7846027 ,
-0.21748793, 2.12195754, 1.37947524], dtype=float32))
(u'Actor_output_W:0', array([[ -8.76691891e-04, -3.08698677e-02],
[ -1.19005851e-01, 3.71613353e-03],
[ 9.65019912e-02, 1.14460409e-01],
[ 1.22101940e-02, 3.48487757e-02],
[ -6.32809997e-02, -2.43655257e-02],
[ 4.03704345e-02, -7.84816295e-02],
[ -2.81405076e-02, -1.81830302e-01],
[ -2.96990760e-02, -1.51409447e-01],
[ -1.14105800e-02, 4.10314389e-02],
[ 1.09948829e-01, -8.79727677e-02],
[ 8.74995366e-02, -4.93757017e-02],
[ 3.22851688e-02, 4.92592901e-02],
[ 2.52036937e-02, 1.26435771e-03],
[ -2.71705668e-02, -8.20640251e-02],
[ -4.80206124e-02, 7.84454197e-02],
[ 6.62243366e-02, 7.20252767e-02],
[ -2.11139694e-02, -2.84207482e-02],
[ 5.99373085e-03, -3.55937481e-02],
[ -2.70109400e-02, 2.61803400e-02],
[ -5.77936769e-02, 6.78113997e-02],
[ 5.42171746e-02, 7.97184855e-02],
[ 7.24046454e-02, 1.02115218e-02],
[ -8.41056332e-02, 4.81463410e-02],
[ -1.25732705e-01, -1.11196190e-02],
[ -1.10357806e-01, 1.00296266e-01],
[ -1.70931667e-02, -1.14939101e-01],
[ 4.39495146e-02, -4.12300825e-02],
[ 6.81385100e-02, -1.55016063e-02],
[ 3.22596543e-02, -2.90127173e-02],
[ 6.01546727e-02, 8.25749561e-02],
[ 2.77086012e-02, 5.51092252e-02],
[ 8.12990814e-02, -1.49443120e-01],
[ -7.97680765e-02, 1.29366621e-01],
[ -2.53591835e-02, 5.69873340e-02],
[ -6.36534169e-02, -3.37472782e-02],
[ 1.80550385e-02, 5.64337224e-02],
[ -3.81055363e-02, -2.45531425e-02],
[ 7.94692859e-02, 1.94207933e-02],
[ 5.95506979e-03, 5.01618236e-02],
[ -1.00117065e-01, -2.29053237e-02],
[ -9.74449888e-02, 1.47751004e-01],
[ -1.12234630e-01, 1.02525942e-01],
[ -7.14013129e-02, -2.18527000e-02],
[ 1.57833427e-01, 5.81730269e-02],
[ 4.34051976e-02, 1.35150226e-02],
[ 1.93440119e-05, 6.94071787e-06],
[ -6.69320151e-02, 5.13925543e-03],
[ -6.20150156e-02, -4.13779728e-03],
[ -2.27307454e-02, 7.89260566e-02],
[ 1.48123443e-01, 8.43607634e-02],
[ 3.37842666e-02, 5.48587479e-02],
[ -4.10583802e-02, -7.79675543e-02],
[ -1.20717958e-01, -5.80552146e-02],
[ 7.47567117e-02, 3.70735638e-02],
[ 6.06835168e-03, -2.82336306e-02],
[ -2.91416049e-02, 4.23480831e-02],
[ -7.33266696e-02, 1.27943709e-01],
[ 3.94418389e-02, -6.22650459e-02],
[ -9.95908901e-02, -7.85603933e-03],
[ -1.36575373e-02, 8.68254378e-02],
[ 1.71206724e-02, -6.93133101e-02],
[ 1.73562035e-01, 5.55955768e-02],
[ 7.72708803e-02, 1.33735880e-01],
[ -3.16548795e-02, -1.56175494e-01],
[ -1.11969031e-01, -1.56581476e-01],
[ -2.92116906e-02, -1.79167967e-02],
[ 1.36776760e-01, 5.10283262e-02],
[ -4.73783985e-02, -6.05664309e-03],
[ 1.76678021e-02, 9.47924852e-02],
[ 7.39883557e-02, -1.04817757e-02],
[ -7.99243674e-02, -4.77083214e-02],
[ -3.49384956e-02, 1.01614706e-01],
[ -8.21166486e-02, -1.58411041e-02],
[ 1.09422497e-01, -3.01169343e-02],
[ 3.71219367e-02, 3.17488168e-03],
[ -3.87463421e-02, 1.27270287e-02],
[ 7.56556029e-03, -3.95741500e-02],
[ 4.89587039e-02, -1.28118351e-01],
[ 8.50566179e-02, 8.79083350e-02],
[ -6.12992272e-02, 1.27185779e-02],
[ -7.05215111e-02, 1.11424718e-02],
[ -3.49384770e-02, -1.19260870e-01],
[ -4.56628464e-02, 4.02905680e-02],
[ -2.10124142e-02, -6.23733215e-02],
[ 6.59426376e-02, 1.96607523e-02],
[ -3.80337425e-02, -1.98936835e-02],
[ -2.79653966e-02, 1.83896020e-01],
[ -1.39306113e-02, 1.65303722e-01],
[ -1.41077396e-03, -6.93012625e-02],
[ 2.02834740e-01, 1.60479676e-02],
[ -9.34433937e-02, -6.57190382e-02],
[ -1.33234724e-01, -5.37957773e-02],
[ -7.11210370e-02, 8.72007385e-02],
[ -6.64450526e-02, 2.50344962e-01],
[ -7.98843503e-02, -2.43439414e-02],
[ -3.11871246e-02, -9.32564586e-03],
[ -2.09718551e-02, -4.09383513e-02],
[ -6.10097777e-03, 4.72242087e-02],
[ -2.15725303e-02, -1.72038041e-02],
[ 7.51805976e-02, -4.70133824e-03],
[ 2.36786846e-02, -5.79449385e-02],
[ 5.14581762e-02, 6.10406175e-02],
[ -3.42623256e-02, -1.79163888e-02],
[ 5.33013567e-02, 1.05160646e-01],
[ 6.39378652e-02, 2.39389366e-03],
[ -1.11036085e-01, -1.13599464e-01],
[ -8.25501829e-02, 2.45169103e-02],
[ 1.21581592e-01, -6.09539449e-02],
[ -5.02556302e-02, 9.27153453e-02],
[ 1.07000135e-01, -4.96076643e-02],
[ -9.00843069e-02, 1.42152831e-01],
[ -9.51232463e-02, -9.37574729e-02],
[ -2.05957163e-02, -1.88999847e-01],
[ 4.00200486e-02, -3.91558520e-02],
[ -7.65001029e-02, 9.43929236e-03],
[ 4.55016689e-03, 6.60842508e-02],
[ -3.64515297e-02, -6.86637163e-02],
[ -1.90450519e-01, 1.40214264e-01],
[ 6.27715364e-02, -4.37777862e-02],
[ 1.61669590e-02, -2.10741982e-02],
[ 1.21590182e-01, -1.22214705e-02],
[ -6.95824670e-03, -3.38192992e-02],
[ 2.05808617e-02, 6.91423193e-02],
[ -1.01207923e-02, 1.19514436e-01],
[ 1.32273696e-02, -1.39843583e-01],
[ -8.18182677e-02, -3.05667836e-02],
[ -7.00198635e-02, -8.97349417e-03],
[ 1.22950040e-02, 5.93732260e-02]], dtype=float32))
(u'Actor_output_b:0', array([ 0.73234677, -0.03757242], dtype=float32))
(u'Model_encoding_0_128_W:0', array([[ 0.17917201, -1.01395178, 0.11591133, ..., 0.36964464,
1.13481104, -0.40418565],
[-1.35688031, 0.70264524, 0.39999881, ..., -1.0983274 ,
0.29786995, 1.04856646],
[ 0.1037867 , -0.49320912, -2.94712305, ..., -0.34723401,
-0.41496432, -0.80160862],
...,
[-1.18690133, 1.06154573, -0.07643984, ..., -0.82608032,
-0.97225469, -0.03411665],
[ 0.38336131, -0.51195568, 0.38449305, ..., 0.19555998,
0.61809188, 0.62508368],
[ 0.21392091, 0.10262807, 0.03195646, ..., 0.19250952,
-0.17280522, 0.19500913]], dtype=float32))
(u'Model_encoding_0_128_b:0', array([-0.27720106, 0.47427785, -0.61456329, 1.48531544, -2.19018173,
-0.60296166, 0.83316839, -0.97948724, -2.45538378, -0.70606154,
-2.371099 , 0.6262778 , -0.51011902, -0.34905496, 0.11999631,
0.5344103 , -0.49274182, -0.59691173, 0.50403774, -1.10684884,
-1.38140142, 1.59632742, 0.45207211, -0.21177338, -2.44820213,
-0.73800075, -1.03797531, 0.24841875, -0.04607596, -1.01397026,
0.24236375, -0.82417631, -1.43615413, -0.74585384, -0.02341394,
0.7575407 , 1.09740806, -0.52072644, -1.57396102, 2.08571172,
-2.14406371, -0.88774961, 0.99817699, 0.27946413, -1.42191148,
-2.28688121, -0.36472556, -1.08155227, -1.80528069, -0.19403973,
0.14698981, 0.4148356 , 0.76445168, -1.47763276, -0.89340729,
-0.97026026, -0.98450547, 1.15744007, 0.87108374, -0.19550499,
-0.59098965, 0.37208581, -0.47082695, -0.91043174, -0.33944526,
-1.88382125, 0.24453166, -2.39113235, 1.14897501, -1.13925922,
-0.57109654, 0.42758453, -1.63271439, 0.52567059, -0.77961498,
-1.70080268, -0.50483173, 0.93795699, -1.59869111, 0.52247036,
1.42057443, 0.82892084, 0.67714059, 0.3226141 , 0.06430887,
0.12795095, 0.95457542, -0.1648643 , 0.96159226, -0.22293271,
-0.37598583, -2.77556372, -0.11927647, -0.50973016, 0.18017715,
-2.12156773, 0.04363552, -1.20593774, -0.11801288, -3.08670878,
0.3594752 , -1.29593718, 0.76409614, -0.528126 , 0.99165452,
0.51176012, 0.16627979, -0.99536222, 0.25176066, -0.31371114,
-0.94970202, -2.30666018, -0.48273778, -1.60639226, -1.32118022,
-0.06895649, -1.44842863, 0.3407591 , -2.20715547, 0.18327889,
0.25159451, -0.93463004, -0.87844688, -1.3784163 , 0.82184649,
-0.11345341, -0.69852352, -0.92332828], dtype=float32))
(u'Model_encoding_1_128_W:0', array([[ -5.27509272e-01, -2.29309034e-03, 7.90692687e-01,
2.70322748e-02, 2.12218118e+00, -7.89202988e-01,
5.60066837e-04, 5.22503197e-01, 2.16473192e-01,
-3.99630874e-01, 5.37247490e-03, -3.09146754e-03,
-1.36146724e+00, -7.16002345e-01, 7.63398362e-03,
4.31241840e-02, -5.15646022e-03, -6.40223801e-01,
-7.65850961e-01, -1.83402419e+00, 3.63812864e-01,
-5.88139355e-01, 3.89956951e-01, 2.81089759e+00,
8.51206243e-01, 1.71590236e-03, -5.50253242e-02,
-5.20186722e-01, 1.44880712e+00, 8.24728370e-01,
9.70921397e-01, -1.34269214e+00, -2.68700480e-01,
3.25486809e-01, -9.40243006e-02, -2.88748043e-03,
-1.01020467e+00, -2.74793983e-01, 2.05255553e-01,
4.98225361e-01, 1.08538842e+00, -9.83649373e-01,
3.11516166e-01, 5.27088761e-01, 5.99770427e-01,
-9.59302578e-03, -1.68019402e+00, 3.80031288e-01,
7.63825774e-01, 5.21443486e-01, 6.33791029e-01,
-4.35231507e-01, -1.53082025e+00, -3.50496289e-03,
-1.23432814e-03, -7.83727646e-01, 7.39063740e-01,
2.35938102e-01, 5.02618790e-01, 1.61558226e-01,
8.54779501e-03, -1.96366489e+00, 3.68786743e-03,
-5.87294579e-01, 7.33091533e-01, -1.51233703e-01,
3.54597509e-01, -5.11431787e-03, 6.22965395e-02,
-1.09687078e+00, 8.71538401e-01, -9.85630691e-01,
-5.27073085e-01, -1.76600106e-02, -5.70782423e-01,
2.27843237e+00, -7.38709509e-01, -1.77085120e-02,
-3.12325209e-01, 1.15119267e+00, 9.21422243e-01,
-5.01486845e-02, -2.81220555e-01, 9.18304503e-01,
1.58840433e-01, 3.02477181e-01, 1.10238612e+00,
-2.99434345e-02, 4.82202709e-01, -7.59404719e-01,
-2.51899332e-01, 2.98618823e-02, 4.66805995e-01,
1.57754958e+00, -5.08729815e-01, 4.18044269e-01,
2.08715093e-03, 1.78597033e+00, 7.27036834e-01,
1.13523914e-03, -5.47239304e-01, 2.52173162e+00,
-7.67045856e-01, 2.25437546e+00, -3.34441876e+00,
7.25289015e-03, 3.72346431e-01, -3.18641844e-03,
8.30273271e-01, -1.39403379e+00, -1.73782909e+00,
1.63651240e+00, 9.60002318e-02, -6.59331143e-01,
2.62184203e-01, 2.93877861e-03, 1.31727147e+00,
1.56234789e+00, -7.51669845e-03, 3.29852216e-02,
-2.41857558e-01, 1.62216112e-01, 1.50783324e+00,
-1.60199046e+00, 5.39735079e-01, -3.20415199e-02,
4.56641568e-03, 8.09997097e-02],
[ -6.95265114e-01, 1.06402896e-02, 1.07553267e+00,
-3.15934718e-01, -5.31036913e-01, 8.72846007e-01,
-6.11480605e-03, 2.02464485e+00, 1.10527265e+00,
-1.94849484e-02, 1.32560451e-03, -9.36456211e-03,
-1.10440806e-01, 5.28583527e-01, -1.13829982e-03,
1.46456838e-01, 3.26070306e-03, -2.84898102e-01,
-3.31587911e-01, 8.17734897e-01, -5.57818055e-01,
4.32535708e-02, 4.61863399e-01, 3.34542775e+00,
1.68767142e+00, -1.97062409e-03, 1.82143543e-02,
-3.15413326e-01, 5.71750879e-01, -6.96081042e-01,
-1.86117634e-01, -8.25304985e-02, -4.60347757e-02,
1.50184110e-01, -1.63367800e-02, -6.02571387e-03,
5.49084723e-01, -1.31364334e+00, 4.08175975e-01,
5.60796298e-02, 5.27475059e-01, 3.08581138e+00,
-9.91034627e-01, -1.98629940e+00, -1.48721898e+00,
-2.13527726e-03, -8.60395491e-01, 2.66424775e-01,
9.59288955e-01, 5.66777349e-01, -3.26572001e-01,
-2.48560339e-01, -1.05838883e+00, 4.42508049e-03,
-1.51236160e-02, 7.74017394e-01, -6.52687550e-01,
-3.81333292e-01, 1.65389314e-01, 2.71702975e-01,
-7.69588398e-03, 8.82599115e-01, -4.30355454e-03,
-2.25666881e-01, 1.14588106e+00, -8.89251888e-01,
-3.86750698e-01, -1.17176091e-02, -1.88769132e-01,
-6.39712751e-01, -8.19174945e-01, -5.28638184e-01,
-2.96610524e-03, 2.48057008e-01, 4.55826133e-01,
-9.05890584e-01, -2.98752282e-02, 1.16145359e-02,
-7.98509419e-01, -1.55664492e+00, -1.57139874e+00,
2.07952082e-01, 3.07469606e+00, -1.00955224e+00,
3.82405579e-01, -4.29341346e-01, 1.33340645e+00,
3.26244794e-02, -8.75943661e-01, 5.77107549e-01,
3.27833921e-01, -4.11424369e-01, 1.66930988e-01,
5.46188891e-01, 6.75999105e-01, 1.02786946e+00,
6.28753053e-03, -1.69038594e+00, -1.28158283e+00,
-6.16281433e-03, 1.53485477e+00, -3.00859988e-01,
8.29099774e-01, 1.56167293e+00, -7.32667074e-02,
-8.09262134e-03, -1.72382131e-01, -1.17286490e-02,
8.63663405e-02, -1.17823267e+00, -3.05686109e-02,
-1.14553905e+00, 1.35298014e-01, -2.59282142e-01,
2.00424767e+00, 1.54368514e-02, 2.03050062e-01,
-1.43472648e+00, -4.66721505e-03, 3.58106126e-03,
-3.78339887e-01, -6.11038879e-02, -1.28475344e+00,
-1.35381922e-01, -2.19015336e+00, -9.49618965e-02,
9.20771249e-03, 5.62354565e-01]], dtype=float32))
(u'Model_encoding_1_128_b:0', array([ -2.64470029e+00, -6.82893500e-04, 5.08727014e-01,
-1.37868613e-01, 7.40070760e-01, 2.03183815e-01,
2.01005372e-03, -3.03202599e-01, -2.78620511e-01,
-3.10218900e-01, -3.18248244e-03, 1.19785802e-03,
9.97481309e-03, 3.04802567e-01, 2.51638703e-04,
-1.77872074e+00, 3.14007094e-03, -3.17266494e-01,
-9.37933862e-01, -6.02477610e-01, 4.23744367e-03,
-2.42934227e-01, 5.96018493e-01, -2.14682555e+00,
-4.65473205e-01, -1.81846786e-03, -2.30974564e-03,
6.30899966e-01, 6.73503041e-01, 9.31322053e-02,
8.52377236e-01, -1.11980653e+00, -2.01818407e-01,
5.03077030e-01, -9.93275121e-02, -2.14532134e-03,
-2.96053976e-01, 4.36096452e-03, -3.28072667e-01,
2.04652950e-01, 5.16486108e-01, -1.27093458e+00,
-7.88610876e-01, -2.62185305e-01, -7.87747264e-01,
-1.74406054e-03, -3.35756391e-01, 5.02550006e-01,
2.44705737e-01, 1.21884274e+00, 6.45470619e-01,
-5.73451877e-01, -8.62104833e-01, -2.33416841e-03,
1.92257273e-03, 6.36778474e-01, -3.96320432e-01,
1.39912176e+00, 8.56495678e-01, 3.56857359e-01,
1.54736126e-03, -7.60363579e-01, -9.90980654e-04,
-4.79322284e-01, -6.70671910e-02, -1.22455752e+00,
5.21773040e-01, -1.10516080e-03, 3.92556518e-01,
2.80607760e-01, 5.73509395e-01, -4.67607647e-01,
-3.64704937e-01, 1.48935452e-01, 3.60324174e-01,
-1.32981157e+00, 1.44378468e-02, 3.59932380e-03,
1.03617966e+00, -1.18284822e-01, 4.31085788e-02,
-5.44342399e-01, -4.67005759e-01, 2.74959356e-01,
6.26610994e-01, 5.10992467e-01, 7.24508166e-01,
4.35108785e-03, -4.53850269e-01, -2.02938035e-01,
-2.61952057e-02, 1.15752387e+00, 2.55401641e-01,
7.23402679e-01, 4.46678698e-01, 3.53875428e-01,
1.97228277e-04, -2.02527142e+00, 1.26925884e-02,
-1.06315117e-03, -4.47579682e-01, -4.49764043e-01,
2.31643513e-01, -1.29214191e+00, -1.40010309e+00,
1.73717597e-03, -1.61403790e-02, 1.06506585e-03,
1.02939896e-01, -1.00910105e-02, -6.99899137e-01,
1.40715614e-01, 1.67012751e-01, -3.57746363e-01,
-1.19534075e+00, -1.53669302e-04, 4.08635527e-01,
7.53610551e-01, -1.38407911e-03, 2.13723946e-02,
-6.16065562e-01, 5.20695224e-02, -4.98691946e-01,
2.35673606e-01, -1.31326807e+00, 2.20209435e-01,
6.45635242e-04, 1.48932889e-01], dtype=float32))
(u'Model_hidden_0_128_W:0', array([[ -1.79246873e-01, 1.20526090e-01, -1.36537790e+00, ...,
3.08667962e-03, -2.59987026e-01, -8.94618407e-02],
[ 1.55724939e-02, -3.93180966e-01, -2.63892293e-01, ...,
-4.23699230e-01, -3.22718412e-01, -7.01205432e-02],
[ 1.90655917e-01, -2.49632411e-02, -1.10886782e-01, ...,
4.18278575e-01, 2.51816660e-01, 2.65849948e-01],
...,
[ 1.87759921e-02, -2.81045824e-01, -1.42520428e-01, ...,
-1.70027271e-01, -1.94684207e-01, 4.37596813e-03],
[ 1.08801827e-01, -9.57067008e-04, 6.11523762e-02, ...,
4.82354611e-02, -1.78888440e-02, -3.41018513e-02],
[ 4.11317647e-02, -3.25862765e-01, -1.30445570e-01, ...,
-3.51646692e-02, -1.97259963e-01, 6.94455504e-02]], dtype=float32))
(u'Model_hidden_0_128_b:0', array([ 1.53333879, 0.70493335, -0.02480832, 0.10265222, -1.37381828,
-1.50981998, -0.05813786, 0.40179005, 0.12986852, -0.09521175,
-1.93410957, -0.82744348, -0.08553523, -0.99069744, 0.246038 ,
0.02972425, 0.32109851, 0.5145483 , -1.0021733 , -0.00587419,
-1.56762469, -0.3238658 , -1.06347001, 0.58505011, 1.4256891 ,
0.40619817, -1.07352066, -0.04242922, -1.32971537, -0.4839499 ,
-1.99757433, -1.09241009, 1.12801993, -0.24271399, 0.00811427,
0.46958497, 0.52115208, 0.69041985, 1.72831202, -1.6813122 ,
-1.51397395, 0.13560525, -0.90340465, 0.12853052, 0.12666254,
-0.25227299, -0.21032478, -0.67783618, 0.88423121, -0.48506692,
1.28678608, 0.34426832, 0.36505571, -0.29937655, -1.22181427,
0.11692158, 0.38953397, -1.22314966, 0.25278094, 1.82419455,
0.37874395, 0.40743598, 0.12966026, -0.07905538, 2.00912547,
-0.69802481, 0.07913193, 0.65208662, 0.86850512, -1.11243856,
0.36582708, -1.55625355, 0.61084205, -1.24258494, -0.70762378,
-0.09333174, -1.13205779, -0.96739906, 0.39476916, 0.04418796,
-0.16310868, -1.62712801, 1.01534712, -1.75832057, -1.12542689,
-0.93142176, -0.37041873, 0.54816276, 0.02793968, -0.86431646,
0.26327318, -0.99268472, 0.85795879, -0.87603068, 0.4949542 ,
-1.5977633 , 0.45606059, 0.07198273, 0.85201573, -0.39022085,
0.05650432, 0.46000552, -0.09588034, 0.50370234, -1.87480295,
-1.44611037, -0.20547393, 0.88748419, 0.04980588, -0.34244427,
-0.11138149, -0.45428193, -0.84380221, 0.84520793, -1.35152113,
1.29114556, -0.19931217, 0.55101472, -0.85213459, 1.92695618,
0.57688814, 0.8418287 , 0.44990751, -0.51843089, -0.52617103,
0.42566001, 0.78266329, 0.23801255], dtype=float32))
(u'Model_output_W:0', array([[ -3.13602766e-04, 1.03287364e-03, -6.91016845e-04, ...,
-1.56053953e-04, -1.11266342e-03, 2.64953909e-04],
[ 4.39034629e-05, 1.47953816e-03, -3.96795018e-04, ...,
-6.66242966e-04, 7.93447602e-04, -1.46735902e-03],
[ 3.41317477e-03, -3.00413510e-03, -8.85069021e-04, ...,
-5.77228842e-04, -3.60719569e-04, 2.01322953e-04],
...,
[ 8.51825252e-02, -5.92668504e-02, 7.49013722e-02, ...,
-1.15458027e-01, 5.10399006e-02, 4.90282714e-01],
[ 3.30569781e-03, -2.50108933e-05, 2.01914852e-04, ...,
-3.52152716e-03, -1.10524858e-03, -3.13291763e-04],
[ 3.98183428e-02, -2.44499347e-03, -2.52343323e-02, ...,
2.52014603e-02, -1.65540650e-02, -5.65229857e-05]], dtype=float32))
(u'Model_output_b:0', array([ 6.52486742e-01, -5.89411139e-01, 1.35626793e+00,
9.58649874e-01, 5.12772612e-02, -2.05808625e-01,
7.78200850e-02, 1.56964684e+00, 2.28562817e-01,
1.19905709e-03, 1.51874948e+00], dtype=float32))
(u'Reward_encoding_0_128_W:0', array([[-0.04495357, -0.2258739 , -0.21196491, ..., 0.27241901,
-0.36949 , -0.38929141],
[ 0.28503692, -0.46363541, 0.04214501, ..., -0.10383368,
0.19556317, 0.08630405],
[ 0.28403738, -0.67251885, 0.2239155 , ..., -1.02853084,
-0.19411661, -0.54719245],
...,
[-0.48497376, -1.65760469, -1.26017535, ..., 1.49813282,
-1.73668873, -0.10567269],
[-0.34971395, 1.66781056, 0.67431176, ..., -1.1829772 ,
0.80825716, -0.09159763],
[ 0.06435391, -0.09803239, 0.15726539, ..., 0.57311696,
-0.11231451, -0.22449891]], dtype=float32))
(u'Reward_encoding_0_128_b:0', array([ -1.37350142e+00, -4.96656746e-01, 7.55201042e-01,
1.83468200e-02, 6.04959369e-01, 7.42985904e-01,
-8.53587925e-01, -2.25707274e-02, -8.17373157e-01,
2.89298296e-01, 1.14356124e+00, -3.17615181e-01,
1.04457211e+00, 2.36234307e-01, 1.34612665e-01,
-5.96209049e-01, 1.01341593e+00, 5.26074529e-01,
-3.05986881e-01, -4.65963244e-01, 1.11489087e-01,
-9.34497267e-02, -6.66870326e-02, -1.68890774e+00,
-1.13117255e-01, 4.89991516e-01, 9.44740102e-02,
5.78724146e-01, -5.21995306e-01, 7.60051887e-03,
7.66861022e-01, -6.51413560e-01, 9.43910256e-02,
1.15228988e-01, 2.00586700e+00, 1.73474520e-01,
9.05389309e-01, -1.18019485e+00, 6.21977687e-01,
7.50416338e-01, -1.26198268e+00, 3.81149590e-01,
6.66517377e-01, -4.34777409e-01, -1.19776480e-01,
5.32079227e-02, -1.90325308e+00, -7.71734864e-02,
6.95870817e-02, -1.61133632e-01, -9.85787690e-01,
-1.35145411e-01, 5.10126315e-02, 1.80171502e+00,
8.33373889e-02, -1.01910487e-01, 3.97451997e-01,
3.39711696e-01, 1.19712353e+00, -6.15055442e-01,
-4.90680277e-01, 2.99385786e-01, 2.78079331e-01,
-1.49800920e+00, -9.42377031e-01, 1.12845755e+00,
-4.13977861e-01, -2.10765815e+00, 1.54722542e-01,
5.89143991e-01, -1.81278539e+00, -8.69063497e-01,
1.02837451e-01, -5.00208557e-01, -6.74904525e-01,
-1.56166291e+00, 3.26060414e-01, 6.11751974e-01,
-1.00999638e-01, 3.07470858e-01, -3.59622121e-01,
-9.34378088e-01, -7.60218382e-01, 3.85889411e-01,
-8.13448653e-02, 1.60320133e-01, 7.40625560e-01,
3.31434757e-01, 1.90244198e-01, 2.41249204e-01,
-1.13769841e+00, -1.07354188e+00, 5.47364235e-01,
-4.80955333e-01, 2.76625812e-01, -3.83826256e-01,
-6.37976885e-01, -1.09711051e+00, -7.40277886e-01,
-1.23927641e+00, -4.13531065e-01, 7.42598474e-01,
1.11630358e-01, -5.46207607e-01, -2.12871385e+00,
-2.17395926e+00, 2.15819025e+00, -9.44520414e-01,
1.41817403e+00, -1.07566273e+00, 9.43863571e-01,
-1.60501897e-01, 4.87133890e-01, -2.07815826e-01,
9.75899398e-01, 2.51363188e-01, -9.11261797e-01,
-5.44363372e-02, 3.84647250e-01, 1.30743254e-03,
-1.50678650e-01, -1.13277245e+00, 5.25112331e-01,
-1.15018122e-01, -5.32458961e-01, 8.75789642e-01,
3.02139223e-01, -4.10687596e-01], dtype=float32))
(u'Reward_encoding_1_128_W:0', array([[ -3.30105513e-01, -6.36039615e-01, -5.86322993e-02,
-8.39062482e-02, 2.26852968e-01, -1.74571478e+00,
-5.50126553e-01, -1.79329062e+00, 4.90960658e-01,
-7.91062176e-01, -1.01811998e-03, 1.03921093e-01,
-1.28930342e+00, -6.19129717e-01, 4.53096509e-01,
-9.59687173e-01, -2.16410652e-01, 2.41295606e-01,
6.25240684e-01, 2.86150561e-03, -7.39691377e-01,
-1.30649313e-01, 1.34898341e+00, 1.47920668e+00,
2.72519052e-01, -3.05515796e-01, 2.44045764e-01,
1.46247888e+00, 4.26373392e-01, 9.24911380e-01,
-1.51123613e-01, 6.65892288e-02, -3.07872649e-02,
4.76310194e-01, 1.87934637e+00, 8.51709485e-01,
-5.38388014e-01, 2.92014122e-01, -2.12277934e-01,
-1.14021385e+00, 1.17786974e-01, -7.39949286e-01,
-7.82733262e-02, 1.48829341e+00, -1.73583567e+00,
4.34519678e-01, 3.23948085e-01, -6.05024159e-01,
-9.83551085e-01, -1.15185753e-01, 1.00756967e+00,
5.28818846e-01, -9.91414309e-01, -8.00679088e-01,
7.43474245e-01, -9.06082019e-02, -1.24961054e+00,
-7.22396448e-02, -4.85742927e-01, -3.55442017e-01,
-3.85373950e-01, 4.43534046e-01, -4.51131791e-01,
-1.01700008e-01, -3.26540649e-01, -1.56834579e+00,
-4.62169379e-01, -6.03391230e-01, -1.10899067e+00,
7.11373165e-02, -1.06771089e-01, 2.93876261e-01,
2.09787726e-01, -3.73532593e-01, -3.28139395e-01,
6.04511499e-02, -4.83150035e-02, 1.22478580e+00,
-2.26191595e-01, -5.95598631e-02, 5.27305782e-01,
4.87757921e-01, -3.57241705e-02, -1.14114746e-01,
7.56533861e-01, 3.90395582e-01, -9.25070524e-01,
6.67401329e-02, 1.47593772e+00, -2.29572073e-01,
-7.11593330e-01, 1.18526208e+00, -8.53581876e-02,
-4.10536706e-01, -6.10824227e-01, 9.29961383e-01,
1.15877569e+00, 1.58064008e-01, -1.38699695e-01,
1.74799156e+00, 2.26933122e+00, 2.11859196e-01,
9.75917056e-02, -4.91787046e-02, -9.77287054e-01,
3.98894310e-01, 1.53075859e-01, -3.88554186e-02,
1.68492591e+00, 4.74772543e-01, -1.72920078e-01,
2.73950905e-01, -1.25193655e+00, -7.53024146e-02,
1.18908727e+00, -1.47141778e+00, 7.55376875e-01,
-6.77054599e-02, 1.62526727e+00, 1.14708811e-01,
6.07445002e-01, 7.93073654e-01, 7.63177813e-04,
-2.74165362e-01, -5.53924292e-02, -6.46681905e-01,
-1.74250376e+00, 9.94209528e-01],
[ -9.82767403e-01, -3.60807478e-01, -4.71949160e-01,
4.71419245e-01, 1.01820600e+00, 1.58100009e-01,
-2.08884090e-01, -6.31678283e-01, 1.39407909e+00,
-5.42533755e-01, -1.05978828e-03, -3.96763563e-01,
-5.16387284e-01, 1.68471837e+00, -4.24903154e-01,
2.43752852e-01, -3.60850960e-01, -1.35530639e+00,
-2.00992155e+00, 1.82086825e-02, 2.27867156e-01,
-3.17805827e-01, 1.41051388e+00, 4.20441151e-01,
4.33554612e-02, 7.62307718e-02, -1.08230401e-04,
4.22971457e-01, 1.62533130e-02, 8.23517680e-01,
-4.44671623e-02, 6.33021235e-01, -1.58680022e+00,
1.54954016e-01, 2.69324332e-01, 1.76910961e+00,
-9.41705763e-01, 2.36271143e+00, 8.05857256e-02,
-4.45021719e-01, 5.65619528e-01, -2.18058872e+00,
-6.26479030e-01, -8.46941888e-01, -1.19991851e+00,
-7.18339145e-01, -2.55379528e-01, 8.30818415e-01,
7.93356597e-01, 2.20266178e-01, 5.38927257e-01,
3.31927747e-01, -9.29999202e-02, 2.63332456e-01,
-3.37007821e-01, 1.05608821e+00, -6.53971910e-01,
-2.16471910e-01, -1.03505468e-02, 5.00822701e-02,
1.63817549e+00, 1.03724372e+00, -2.71049619e-01,
1.15157329e-02, 1.15732205e+00, -1.46989572e+00,
-2.90792882e-01, 2.24908304e+00, 7.00080931e-01,
1.09322619e+00, -2.96416968e-01, 1.43698823e+00,
-2.32896015e-01, -2.78855473e-01, 1.78603494e+00,
-1.25608516e+00, 2.67188579e-01, -2.00243771e-01,
-1.75702834e+00, -4.20121670e-01, 1.17579982e-01,
6.05612576e-01, -2.21324041e-01, 1.01969266e+00,
-8.43372643e-01, -7.84528732e-01, 5.40364444e-01,
3.54015715e-02, 4.65035617e-01, 1.57473207e-01,
-1.97004303e-01, -1.14469245e-01, 1.74346447e-01,
-1.73887205e+00, 5.49953699e-01, -3.20187837e-01,
5.50974011e-01, 8.66746068e-01, -2.88495868e-01,
1.97397843e-01, -8.50613534e-01, -1.34174794e-01,
4.16759849e-01, 1.37605035e+00, 1.53684568e+00,
-1.54336190e+00, -4.12350208e-01, -2.03118235e-01,
-1.16365202e-01, 6.29826486e-01, 9.06169176e-01,
3.16263497e-01, 2.34398730e-02, 7.91991115e-01,
-4.26887751e-01, 2.02318549e-01, -2.25749150e-01,
-1.11673482e-01, -1.11772025e+00, 5.27295172e-01,
-2.23664239e-01, -3.81981432e-01, -1.87653518e+00,
1.18336272e+00, -6.70069829e-02, -5.34345329e-01,
1.79232582e-01, -1.02289343e+00]], dtype=float32))
(u'Reward_encoding_1_128_b:0', array([ -3.80698949e-01, 4.03403521e-01, -1.06480770e-01,
-3.55817646e-01, 5.61034977e-01, -3.77926707e-01,
-3.46029758e-01, -9.01102662e-01, -6.84024692e-01,
1.41726828e+00, 1.53017376e-04, 1.29209828e+00,
5.31579733e-01, -1.34829533e+00, 3.92255150e-02,
-5.41211128e-01, 2.14743122e-01, 1.35271657e+00,
-3.91635269e-01, 4.88653295e-02, -4.93163586e-01,
1.77872002e-01, -2.42056513e+00, -1.40237868e+00,
1.56934619e-01, -5.74177325e-01, 6.03977323e-01,
-5.55890143e-01, -6.13032222e-01, -1.29279673e-01,
6.90375865e-02, -5.20045400e-01, -3.62008214e-01,
2.73064107e-01, -2.16728941e-01, -2.46717557e-01,
1.70901984e-01, -1.06589019e+00, 8.24979097e-02,
-5.59623003e-01, 7.08877385e-01, 1.85783997e-01,
-1.42211115e+00, -1.65263548e-01, 8.62646252e-02,
-6.44784272e-01, -5.85434139e-01, 1.21771133e+00,
-6.33729339e-01, 3.49993914e-01, 1.15926981e+00,
6.27085865e-02, 1.17206967e+00, 2.80551344e-01,
-4.82913435e-01, 7.34280288e-01, 1.17444038e+00,
4.94578369e-02, -3.98913443e-01, 2.44917586e-01,
2.16488647e+00, -2.32072279e-01, 1.71471095e+00,
-5.60355365e-01, -2.40936652e-02, -1.36255193e+00,
5.31709313e-01, -1.14013636e+00, -4.64387178e-01,
1.67284155e+00, 4.89880778e-02, 1.67914069e+00,
-1.66500461e+00, 8.61879408e-01, 5.01915216e-02,
-4.85901743e-01, -1.52325487e+00, -7.83638284e-02,
4.12313104e-01, -3.37116532e-02, -1.49174869e-01,
1.03974330e+00, -1.44842835e-02, -3.24498832e-01,
1.42391098e+00, 4.82644081e-01, 6.11678362e-01,
8.85309458e-01, -5.52326381e-01, 1.33509684e+00,
6.07821822e-01, -4.37004566e-01, 1.01868600e-01,
-1.13528095e-01, 9.04098749e-01, -2.91730501e-02,
-1.14814293e+00, -4.93561596e-01, 1.17836438e-01,
-6.11093521e-01, -1.11208642e+00, 1.32196355e+00,
3.91047299e-01, -3.61366719e-01, 1.21098530e+00,
-1.66253638e+00, 1.49367845e+00, 4.80287164e-01,
6.23208523e-01, -9.48328555e-01, -6.54915392e-01,
-5.55657633e-02, 5.07113397e-01, -4.11264241e-01,
-7.67579198e-01, 2.27416724e-01, -3.29006404e-01,
-1.08546494e-02, -1.34264302e+00, 4.72459674e-01,
6.21666729e-01, -1.58938259e-01, -3.85560840e-01,
-2.54328370e-01, 4.41727459e-01, 2.77919173e+00,
-8.08943272e-01, -1.50470161e+00], dtype=float32))
(u'Reward_hidden_0_128_W:0', array([[ 0.0634416 , 0.05927188, 0.00609335, ..., 0.05591453,
0.08308659, 0.05031575],
[ 0.1647463 , -0.0205626 , -0.01475106, ..., 0.09663454,
-0.05214785, -0.10824002],
[-0.1287742 , 0.04548202, -0.0105199 , ..., -0.03460628,
0.14820687, -0.10550293],
...,
[-0.16325432, -0.10533731, -0.05137802, ..., -0.07963935,
0.00096655, -0.02244392],
[ 0.05541919, -0.09395184, 0.03714566, ..., 0.06971982,
0.00054603, 0.10514469],
[ 0.10509292, -0.00549956, 0.11591874, ..., -0.0674852 ,
-0.0050984 , 0.11334889]], dtype=float32))
(u'Reward_hidden_0_128_b:0', array([ 0.29396594, 1.60534847, 1.34382951, -0.62399423, 0.58938283,
-0.0274007 , -0.93429029, 0.31746969, -0.92422795, 1.15510154,
1.5642879 , 0.56921077, 1.52746844, 0.8120175 , 0.8106705 ,
0.1284095 , 0.21780054, 2.11869025, 0.04110449, 0.6561138 ,
-1.49963439, 1.23437035, -0.42297184, -0.17263956, -2.70630717,
2.44387078, -1.53888834, -0.81753719, -2.6198194 , -0.72902417,
1.00552166, 0.99676901, 0.32088491, -2.65049386, 1.67350185,
0.75845546, -0.54512835, 0.95395255, 0.13547498, 0.77197075,
1.43831265, -0.62311721, 1.38975418, 1.47222424, 0.28371564,
-0.83873653, 1.07736516, -0.96660841, 0.1234365 , 0.09148801,
1.75729907, 0.33217123, 0.90918809, -0.09052911, -1.14958155,
-1.24149728, -0.30901909, 0.3708621 , 0.65369266, -0.78318566,
0.1416648 , 0.87375003, -0.04840093, 1.52184665, 0.29510921,
0.91015875, -0.46351007, -1.33889675, -0.01718464, -0.63891178,
2.6199677 , 0.01425888, 2.62288094, -0.76368958, -1.3736192 ,
-0.05107281, 1.16098928, 0.52007776, 2.74473858, -0.7672677 ,
0.13933304, 0.39815161, -1.03701687, 0.04314695, -0.39888772,
0.75698441, -1.92645586, -1.33615267, -0.43318275, -0.17049117,
0.80354953, 0.17065239, 0.45838016, -0.64068794, 1.48864484,
0.41958523, -1.11315393, -2.21892047, 0.62361479, 0.7521385 ,
0.02828376, -0.45651087, -0.25445259, 0.08542622, -0.56919158,
0.16348435, 0.41644296, 2.51529288, 2.06928968, -0.16841625,
1.33486938, 0.88539809, 1.43481469, 2.05783987, 0.78538764,
-0.22244351, -1.73748386, -0.46242401, 0.40165755, -0.36151579,
0.30762798, -0.57767653, -0.42795223, -0.2645207 , -0.27975407,
0.93569398, 0.71918571, -1.11343396], dtype=float32))
(u'Reward_output_W:0', array([[ 7.57149160e-02],
[ -9.16850913e-05],
[ -6.82629412e-04],
[ 3.53661686e-04],
[ -1.55045278e-02],
[ -7.14388007e-05],
[ 2.19638518e-04],
[ 2.07465055e-04],
[ 2.67600715e-01],
[ -1.10674082e-04],
[ 2.39915753e-04],
[ -4.33510606e-04],
[ -2.83009664e-04],
[ 5.45095420e-04],
[ -5.31189144e-04],
[ -2.39259265e-02],
[ 5.94839966e-03],
[ 2.94861675e-04],
[ 2.12579223e-04],
[ 2.75671424e-04],
[ -4.48834966e-04],
[ -1.69163831e-02],
[ -2.57683564e-02],
[ 2.31075901e-04],
[ -9.71737579e-02],
[ -4.46520783e-02],
[ -2.75705960e-02],
[ -1.36811817e-02],
[ 2.55273795e-03],
[ 1.07748367e-01],
[ -4.61704023e-02],
[ -1.35048453e-04],
[ 4.27437481e-04],
[ 7.02408403e-02],
[ -5.68137330e-04],
[ -3.25667323e-04],
[ 9.93018746e-02],
[ -5.80702035e-05],
[ -1.35753391e-04],
[ -3.18863662e-04],
[ -2.88137477e-02],
[ 5.02124727e-02],
[ -8.64240210e-05],
[ 1.85488811e-04],
[ 5.29450714e-04],
[ -1.20436875e-02],
[ -2.52094828e-02],
[ 5.15321419e-02],
[ -9.07483554e-05],
[ 4.31294262e-04],
[ -3.04353627e-04],
[ 1.16808587e-04],
[ -3.23659224e-05],
[ -1.56264432e-04],
[ -2.16783737e-04],
[ 1.51762411e-01],
[ 3.55914533e-02],
[ -2.35261425e-04],
[ -1.84782519e-04],
[ 2.95045171e-02],
[ -3.39789040e-05],
[ -1.12537513e-04],
[ -2.98856874e-04],
[ -5.00457268e-03],
[ 6.69296011e-02],
[ 8.99554864e-02],
[ -2.11321246e-02],
[ -1.89427548e-04],
[ -7.29441177e-04],
[ 2.22430273e-04],
[ 8.03233236e-02],
[ -1.81769710e-02],
[ -5.67926560e-04],
[ 3.23111570e-04],
[ -6.47674799e-02],
[ 5.54894330e-03],
[ 8.23996015e-05],
[ 4.44796577e-04],
[ 3.62192659e-05],
[ 4.40212749e-02],
[ 8.81295353e-02],
[ -2.32023333e-04],
[ 3.10158227e-02],
[ -9.71426666e-02],
[ -2.34522577e-02],
[ -2.24393458e-04],
[ 4.00376953e-02],
[ -2.67522363e-03],
[ -1.73471756e-02],
[ -8.98213548e-05],
[ 8.17274302e-02],
[ -6.37896446e-05],
[ -2.37892673e-04],
[ 5.39961766e-05],
[ 2.33064595e-04],
[ 3.81400436e-02],
[ -2.29093190e-02],
[ -2.03175414e-02],
[ 4.39305877e-05],
[ 1.87939586e-04],
[ 6.17921986e-02],
[ -3.32583711e-02],
[ -2.47689313e-04],
[ -1.92342355e-04],
[ -1.07587678e-02],
[ -1.94782508e-04],
[ -1.08559514e-04],
[ -2.14654501e-04],
[ -4.02787924e-02],
[ -7.92008825e-03],
[ -3.16954829e-04],
[ 8.29984492e-04],
[ 9.25260392e-05],
[ -6.42784755e-04],
[ 7.75823221e-02],
[ 3.32004856e-05],
[ 8.92076790e-02],
[ 2.26166025e-02],
[ 3.78691388e-04],
[ 5.65797032e-04],
[ 4.57942660e-04],
[ -3.74859956e-04],
[ -6.03851629e-04],
[ -7.22685712e-04],
[ -2.05894437e-04],
[ -2.65961047e-04],
[ -2.76742387e-04],
[ -1.05588930e-02]], dtype=float32))
(u'Reward_output_b:0', array([ 0.50232416], dtype=float32))
(u'Value_hidden_0_256_W:0', array([[ 4.85465169e-01, 1.45155326e-01, 9.03520584e-01, ...,
1.44293642e+00, 8.56368780e-01, -1.21902347e+00],
[ -3.52947205e-01, 3.46868555e-03, 7.85465166e-02, ...,
1.98520994e+00, 8.22880387e-01, -1.37416267e+00],
[ -8.93795714e-02, -2.87287086e-01, 5.04912972e-01, ...,
-3.69311757e-02, 5.96735887e-02, -3.79370421e-01],
...,
[ -1.01887298e+00, -8.68955076e-01, 5.92903733e-01, ...,
2.34178126e-01, 2.96987504e-01, -3.02151561e-01],
[ -7.22192153e-02, 1.46270299e+00, 3.71336699e-01, ...,
-1.35612166e+00, -1.67760804e-01, -9.24082298e-04],
[ 4.32945848e-01, -4.18265983e-02, 3.98784615e-02, ...,
-3.51921290e-01, 3.52190405e-01, -4.41781729e-02]], dtype=float32))
(u'Value_hidden_0_256_b:0', array([ 7.86773980e-01, 4.48494524e-01, -2.30760843e-01,
-4.16401505e-01, -6.16139710e-01, -3.79226089e-01,
-5.29969782e-02, 1.60817280e-01, 5.73480308e-01,
-6.79537058e-01, -1.58318532e+00, 2.50684410e-01,
9.23737705e-01, 1.91475302e-01, 1.92796364e-01,
-3.98156583e-01, 9.92786884e-02, -6.59377694e-01,
-1.11684442e+00, -1.55787647e+00, -2.14266241e-01,
1.62248242e+00, -1.25345159e+00, 3.80111337e-01,
-1.81244051e+00, -9.79319215e-01, 1.29731745e-01,
-7.67237067e-01, 8.77174735e-01, 1.50016451e+00,
-2.18077600e-01, -4.81146961e-01, -2.40007043e+00,
-4.50728178e-01, -4.70323086e-01, -2.67823249e-01,
4.39472467e-01, -7.36940563e-01, -9.85660076e-01,
-2.92124480e-01, -7.87844285e-02, -6.06720030e-01,
-1.59213185e+00, -8.63409564e-02, -5.06232120e-02,
-1.93865991e+00, -3.50256056e-01, -2.23125428e-01,
-2.73181289e-01, -1.06443739e+00, -1.87344208e-01,
-1.69781196e+00, 5.29848397e-01, -1.04443014e-01,
9.94768620e-01, -1.20198739e+00, -1.95182800e+00,
3.69992018e-01, -1.21458983e+00, -2.04825664e+00,
-6.73813283e-01, -2.20904082e-01, 1.49777234e+00,
4.25921200e-04, 1.10510123e+00, -6.95608437e-01,
2.04727188e-01, 1.21200597e+00, 7.12204099e-01,
-9.33072716e-02, -7.77466953e-01, -1.47419620e+00,
4.79558200e-01, 4.11826223e-02, 9.87206772e-03,
2.57637888e-01, -2.08143377e+00, -1.64398849e-01,
-3.54423821e-01, -1.46956239e-02, 1.53285861e-01,
-1.11691225e+00, -2.54227936e-01, 2.20219120e-01,
-8.96188855e-01, 8.86284187e-02, -3.59465122e-01,
-1.77282691e+00, -1.75776207e+00, -1.39123106e+00,
1.01452434e+00, 1.24706268e+00, -7.92377055e-01,
-1.11073017e+00, -1.01722908e+00, -2.09259540e-02,
-5.26356339e-01, -8.77410352e-01, -3.96863133e-01,
-2.46597037e-01, 1.95546508e-01, -8.57059658e-01,
8.26160967e-01, 4.53834504e-01, -1.07457876e+00,
3.35836679e-01, -5.27915239e-01, 5.16269505e-01,
1.40155745e+00, 1.45761311e+00, -2.74080491e+00,
1.50873080e-01, -1.92872119e+00, -4.47947025e-01,
-4.95414913e-01, -7.01483488e-01, -1.24414496e-01,
4.20246691e-01, 1.77130744e-01, 1.34709322e+00,
1.54880751e-02, -1.53210402e+00, -4.85473007e-01,
1.39782298e+00, 1.11052251e+00, 1.45161128e+00,
2.00265542e-01, -1.85754645e+00, 8.46586749e-02,
1.54716837e+00, 7.43967444e-02, 1.49700952e+00,
9.48524177e-01, 1.46163478e-01, 1.28156292e+00,
2.97467470e-01, 6.44354224e-01, 1.20353319e-01,
3.39147970e-02, 8.06491673e-02, 1.90771377e+00,
-1.45524049e+00, 2.90539414e-02, -2.79914979e-02,
-1.59827876e+00, 8.78861621e-02, -2.75817037e+00,
-4.94075477e-01, 9.03350636e-02, 1.25101209e-02,
-7.59995997e-01, -1.44280326e+00, 4.10920054e-01,
-2.82217324e-01, 1.36736393e-01, -8.47323596e-01,
3.37196022e-01, -2.66161621e-01, -1.45239770e+00,
-5.52620273e-04, -5.39439559e-01, -8.36761892e-01,
-2.65717447e-01, 2.40063906e-01, -6.60450935e-01,
5.60300410e-01, 9.36439931e-01, -1.18844569e+00,
-1.40830725e-01, -7.28080332e-01, -1.12598813e+00,
-3.20531189e-01, -7.58093297e-01, 5.01802623e-01,
1.16995919e+00, 6.93709612e-01, -1.57120720e-01,
2.98506081e-01, 1.21685171e+00, -1.72502685e+00,
-2.14071691e-01, 5.60284317e-01, -6.14801168e-01,
-4.13144827e-01, 1.22586310e+00, 6.08784020e-01,
-4.32462543e-01, 1.94796488e-01, 7.83363461e-01,
-6.81483150e-01, -5.77142596e-01, -1.19336379e+00,
-9.27566171e-01, -8.60925972e-01, 7.48686492e-01,
1.31610656e+00, 6.09057881e-02, -1.64280105e+00,
9.05732810e-01, 3.24798107e-01, -1.78995505e-01,
-3.58392149e-01, -1.98989022e+00, -7.60992110e-01,
-1.85432553e-01, -2.29074144e+00, 5.04967690e-01,
5.71351945e-02, -1.27650356e+00, -7.99050555e-02,
-4.43186844e-03, -8.28589559e-01, -1.10851383e+00,
3.91751647e-01, -4.82159317e-01, 4.08195525e-01,
4.08485532e-01, -2.54188597e-01, -7.32285380e-01,
8.01012158e-01, -1.49785161e+00, 1.09361517e+00,
8.54822099e-01, -1.53236401e+00, -1.22310495e+00,
6.08865857e-01, -4.59042013e-01, 1.47225034e+00,
1.06509827e-01, -8.79798234e-01, 3.68735939e-01,
-3.51132244e-01, 5.49741149e-01, -7.51680076e-01,
-1.11039853e+00, -1.39329410e+00, -4.11302030e-01,
-1.47532094e+00, -2.22993240e-01, 2.27925062e+00,
4.80526716e-01, 4.59567219e-01, 3.89556617e-01,
-1.61262131e+00, 4.03030664e-01, 1.35475099e-01,
7.42666900e-01, -1.25061917e+00, 7.12114453e-01,
-1.66105139e+00, 3.79364431e-01, -1.50768012e-01,
1.78101778e+00, -1.34243500e+00, 1.59680862e-02,
-6.18374228e-01], dtype=float32))
(u'Value_hidden_1_128_W:0', array([[-0.02864614, -0.03848353, 0.06328054, ..., -0.22709581,
-0.21377978, -0.09015907],
[ 0.05231847, -0.06922241, 0.04629957, ..., -0.22456619,
-0.25169072, -0.03114947],
[-0.06242988, -0.07290327, 0.1383041 , ..., 0.01805444,
0.06354379, 0.07525872],
...,
[ 0.09292363, -0.11953808, -0.12903577, ..., 0.10006614,
0.1026819 , -0.70117855],
[-0.0866784 , -0.07202286, 0.1193538 , ..., -0.22454448,
0.0298896 , -0.02124726],
[-0.09855165, -0.05625169, -0.10508455, ..., -0.23034456,
0.09285395, 0.15080838]], dtype=float32))
(u'Value_hidden_1_128_b:0', array([ 1.16615975, -0.6777342 , 0.38957295, -0.33543918, 1.0124526 ,
-0.85244089, -0.28744566, 0.51760197, -0.07591839, -0.57694519,
-0.45036805, -0.63179928, -0.81328475, 1.01230383, 0.38209462,
1.07509053, 0.62725383, -0.26245099, -0.20989731, 0.61014992,
-1.54258323, -0.49895334, 1.48269248, 1.13048649, -0.86684656,
0.25625125, 0.75746548, -0.55526233, 1.81745434, -0.97624558,
-1.45354736, -0.58171231, -0.31942946, 0.66220856, -0.39068449,
-0.03076545, -0.36633405, -1.91510189, -0.33447295, -1.04656732,
-0.37098527, -0.10888761, -0.35544091, -2.33530235, -1.35978329,
-0.38934815, 0.18549076, 0.59975719, -0.86013114, 1.2470808 ,
-1.41950214, 0.176166 , -0.47948354, -0.46813247, 0.63136369,
-1.0106467 , 0.42569721, 1.52211785, -0.61056763, 0.1358078 ,
1.13056862, -0.48816198, 0.62229598, -1.05629396, 1.89746666,
0.44927949, 0.57265437, -0.72760129, -0.73900497, 0.93846041,
-0.79651922, -0.1288763 , -1.517102 , -0.27945474, -0.95406836,
1.24284256, -0.83789855, -0.31033239, 2.18151593, -1.51365733,
0.38758305, -1.37021041, 0.02115881, -0.77115965, -0.5426873 ,
0.21902913, 0.152403 , -0.54892731, 1.02538192, 1.82474494,
-1.58177733, -0.97952837, -0.39367747, 0.6316002 , -0.7434392 ,
0.31758189, 1.79268575, -0.22081313, -0.47673747, 0.13210551,
-1.42005718, 0.02140215, -0.78823173, -1.55277467, -2.68689442,
-0.89249134, -0.36121827, -0.11190248, 1.00556135, 0.08377521,
-1.03601134, 1.01308572, 0.22550051, -0.72703141, 0.00753921,
-1.17425036, 1.41427577, 0.7844913 , -0.48050255, -0.91828901,
-0.96565086, 1.63898742, 0.55061269, 0.60412186, 0.80325598,
-0.07627103, -0.900855 , -0.61394697], dtype=float32))
(u'Value_output_W:0', array([[ -4.28925967e-04],
[ 3.84709053e-03],
[ 2.59554107e-02],
[ -1.27156153e-01],
[ 4.43048254e-02],
[ 5.83490431e-02],
[ 3.67007330e-02],
[ 1.02430458e-04],
[ -4.71225046e-02],
[ -3.47371660e-02],
[ 2.90230452e-03],
[ 1.99947786e-03],
[ -1.30034459e-04],
[ -3.87248695e-02],
[ -4.02138084e-02],
[ -7.06832216e-04],
[ -1.71161129e-03],
[ -2.55686668e-04],
[ -3.65441851e-02],
[ -6.00891970e-02],
[ -3.33356336e-02],
[ -3.71728987e-02],
[ 2.94235423e-02],
[ 1.43330987e-03],
[ -3.80548984e-02],
[ 2.83843502e-02],
[ -2.78973649e-03],
[ 2.67130556e-03],
[ -2.46769679e-03],
[ 1.86272291e-03],
[ 4.56563421e-02],
[ 2.27718614e-03],
[ -3.18179391e-02],
[ 2.40072492e-03],
[ 3.03020570e-02],
[ 4.53430926e-03],
[ 4.14018780e-02],
[ -1.89512633e-02],
[ 8.91182572e-02],
[ -3.88720422e-03],
[ 3.31139565e-03],
[ -2.45709848e-02],
[ 1.04164775e-03],
[ -3.02676652e-02],
[ 1.36570297e-02],
[ -9.30687354e-04],
[ 1.81321439e-03],
[ -2.98730582e-02],
[ -3.38991806e-02],
[ 2.82559660e-04],
[ 1.70686617e-02],
[ 1.64217420e-03],
[ 3.94805484e-02],
[ -4.12705325e-04],
[ -3.73119698e-03],
[ 1.15669034e-01],
[ -3.66762988e-02],
[ 1.90159446e-03],
[ -3.08786839e-04],
[ 4.19338839e-03],
[ -1.60288978e-02],
[ 9.70085268e-04],
[ 4.69818432e-03],
[ -3.03035900e-02],
[ -1.68927916e-04],
[ 6.86156505e-04],
[ 6.08780701e-03],
[ 1.68837365e-02],
[ -1.91891626e-01],
[ -6.30864291e-04],
[ -1.63453091e-02],
[ 3.15211900e-03],
[ 2.51133535e-02],
[ -2.11888080e-04],
[ -2.65024491e-02],
[ 1.89238542e-03],
[ 1.28084779e-01],
[ 1.31803285e-03],
[ -1.97116984e-04],
[ 3.80255431e-02],
[ 8.89180787e-03],
[ -5.76822506e-03],
[ 9.48613218e-04],
[ -3.79145099e-03],
[ 3.72269657e-04],
[ 3.11259180e-03],
[ -1.58479926e-03],
[ -3.83835398e-02],
[ 1.05129322e-02],
[ -2.51697586e-03],
[ 4.90538683e-03],
[ -3.12093403e-02],
[ -1.50639520e-04],
[ 9.33210645e-03],
[ 2.46649538e-03],
[ -5.07656150e-02],
[ 7.47916638e-04],
[ 1.91284576e-03],
[ -2.27846593e-01],
[ 7.67367929e-02],
[ -6.16137348e-02],
[ 2.31073145e-02],
[ -1.84081662e-02],
[ 3.65246763e-03],
[ 3.31364200e-02],
[ 1.88808178e-03],
[ -3.73456106e-02],
[ -3.30334005e-04],
[ 2.22709831e-02],
[ 2.75172410e-03],
[ 4.48914617e-02],
[ -2.49119895e-03],
[ 5.18952832e-02],
[ 4.05288041e-02],
[ 2.53171381e-03],
[ 3.41169513e-03],
[ 4.67089657e-03],
[ 5.54691360e-04],
[ -1.58709973e-01],
[ 1.85774993e-02],
[ -6.35804459e-02],
[ 2.00543785e-03],
[ 1.29935583e-02],
[ 1.47466199e-03],
[ 3.14103020e-03],
[ 1.29934084e-02],
[ -7.73801133e-02],
[ 2.10138150e-02]], dtype=float32))
(u'Value_output_b:0', array([-2.3759923], dtype=float32))
In [ ]:
Content source: karolkuna/reinforcement-learning
Similar notebooks: