In [1]:
%matplotlib inline
In [2]:
import gym
import tensorflow as tf
import numpy as np
import math
from tqdm import tqdm, trange
import random
import time
In [3]:
from experiencereplay import ExperienceReplay, PrioritizedExperienceReplay, ModelBasedPrioritizedExperienceReplay
from experiment import Experiment
from dmlac import DMLAC
import nn
from exploration import EpsilonGreedyStrategy, OUStrategy
In [4]:
settings = dict(
environment = 'Reacher-v1',
timesteps = 10000,
batch_size = 64,
learning_start = 256,
forward_steps = 1,
discount_factor = 0.9,
trace_decay = 0.5,
max_replay_buffer_size = 100000,
actor_learning_rate=0.0001,
model_learning_rate=0.001,
reward_learning_rate=0.001,
value_learning_rate=0.001,
actor_l2=None,
model_l2=None,
reward_l2=None,
value_l2=None,
actor_target_approach_rate=0.99,
value_target_approach_rate=0.99,
train_updates_per_step = 10,
priority_updates_per_step = 100,
actor_net_layers = [256, 128],
actor_net_activation_fn = tf.nn.tanh,
actor_bounded_output = True,
value_net_layers = [256, 128],
value_net_activation_fn = tf.nn.elu,
model_net_embedding = 128,
model_net_layers = [128],
model_net_activation_fn = tf.nn.elu,
reward_net_embedding = 128,
reward_net_layers = [128],
reward_net_activation_fn = tf.nn.elu,
environment_seed = 1,
noise_seed= 0,
gpu_memory_fraction = 0.1,
render_start=9500,
render_environment = True,
render_frequency = 10,
)
settings["experiment_path"] = "experiments/experiment_dmlac_{}_{}".format(settings["environment"], int(time.time()))
settings["actor_tf_optimizer"] = tf.train.AdamOptimizer(settings["actor_learning_rate"])
settings["model_tf_optimizer"] = tf.train.AdamOptimizer(settings["model_learning_rate"])
settings["reward_tf_optimizer"] = tf.train.AdamOptimizer(settings["reward_learning_rate"])
settings["value_tf_optimizer"] = tf.train.AdamOptimizer(settings["value_learning_rate"])
print(settings["experiment_path"])
experiments/experiment_dmlac_Reacher-v1_1495429650
In [5]:
def preprocess_state(observation):
state = np.array(observation)
if settings["environment"] == "MountainCarContinuous-v0":
state[1] = state[1] * 10
return state
else:
return state
def preprocess_reward(reward):
return reward
In [6]:
env = gym.make(settings["environment"])
env.seed(settings["environment_seed"])
observation = preprocess_state(env.reset())
state = observation
[2017-05-22 07:07:31,013] Making new env: Reacher-v1
In [7]:
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
print(state_dim)
print(action_dim)
print(env.observation_space.high)
print(env.observation_space.low)
print(env.action_space.high)
print(env.action_space.low)
print(str(state))
11
2
[ inf inf inf inf inf inf inf inf inf inf inf]
[-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf]
[ 1. 1.]
[-1. -1.]
[ 9.98110818e-01 9.99995795e-01 6.14393678e-02 2.90001033e-03
1.09008655e-01 -1.79268843e-01 -4.30473791e-03 1.51751220e-04
1.00574556e-01 1.92489481e-01 0.00000000e+00]
In [8]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=settings["gpu_memory_fraction"])
session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
summary_writer = tf.summary.FileWriter(settings["experiment_path"] + "/logdir")
In [9]:
actor_network = nn.fully_connected("Actor", session, [state_dim], settings["actor_net_layers"],
action_dim, settings["actor_net_activation_fn"],
env.action_space if settings["actor_bounded_output"] else None, True)
model_network = nn.fully_connected_with_input_embedding(
"Model", session, [state_dim, action_dim], settings["model_net_embedding"], settings["model_net_layers"], state_dim,
settings["model_net_activation_fn"], None, False)
reward_network = nn.fully_connected_with_input_embedding(
"Reward", session, [state_dim, action_dim], settings["reward_net_embedding"], settings["reward_net_layers"], 1,
settings["reward_net_activation_fn"], None, False)
value_network = nn.fully_connected("Value", session, [state_dim], settings["value_net_layers"], 1,
settings["value_net_activation_fn"], None, False)
print(str(actor_network))
[] --> Actor_input_0
['Actor_input_0'] --> bn_hidden_0
['bn_hidden_0'] --> hidden_0_256
['hidden_0_256'] --> bn_hidden_1
['bn_hidden_1'] --> hidden_1_128
['hidden_1_128'] --> bn_output
['bn_output'] --> output
['output'] --> bounding
In [10]:
agent = DMLAC(actor_network, model_network, reward_network, value_network,
forward_steps=settings["forward_steps"],
discount_factor=settings["discount_factor"],
trace_decay=settings["trace_decay"],
actor_tf_optimizer=settings["actor_tf_optimizer"],
model_tf_optimizer=settings["model_tf_optimizer"],
reward_tf_optimizer=settings["reward_tf_optimizer"],
value_tf_optimizer=settings["value_tf_optimizer"],
actor_l2=settings["actor_l2"],
model_l2=settings["model_l2"],
reward_l2=settings["reward_l2"],
value_l2=settings["value_l2"],
actor_target_approach_rate=settings["actor_target_approach_rate"],
value_target_approach_rate=settings["value_target_approach_rate"],
summary_writer=summary_writer
)
In [11]:
saver = tf.train.Saver()
summary_writer.add_graph(session.graph)
In [12]:
experience_replay = ModelBasedPrioritizedExperienceReplay(agent, env, settings["max_replay_buffer_size"], False)
In [13]:
exploration_strategy = EpsilonGreedyStrategy(agent, env, settings["noise_seed"])
In [14]:
exp = Experiment(settings["experiment_path"], session, env, settings, settings["render_environment"], settings["render_frequency"], settings["render_start"])
progress_bar = tqdm(total=settings["timesteps"])
e_t = 0
for t in xrange(settings["timesteps"]):
e_t += 1
exploration = ((settings["timesteps"] - t) / float(settings["timesteps"])) ** 4
if t < settings["learning_start"]:
exploration = 1
action = exploration_strategy.action(state, exploration)
observation, reward, done, info = env.step(action)
next_state = np.reshape(preprocess_state(observation), (state_dim,))
experience_replay.add_experience(state, action, preprocess_reward(reward), next_state, done, float("inf"))
td_error = math.fabs(experience_replay.get_last_td_error())
model_error = experience_replay.get_last_model_error()
reward_error = experience_replay.get_last_reward_error()
exp.record(t, state, action, reward, next_state, done, td_error, model_error, reward_error)
state = next_state
if t == 5000:
env.seed(0)
done = True
if done:
experience_replay = ModelBasedPrioritizedExperienceReplay(agent, env, settings["max_replay_buffer_size"], False)
settings["learning_start"] = 5100
observation = env.reset()
state = preprocess_state(observation)
e_t = 0
if t >= settings["learning_start"]:
experience_replay.train_agent(settings["batch_size"], settings["train_updates_per_step"])
experience_replay.update_oldest_priorities(settings["priority_updates_per_step"])
progress_bar.set_description('[{}][{}] reward: {:.2f}, reward 100MA: {:.2f}, Exploration: {:.2f}, action: {}, td-error: {:.4f}, model error: {:.4f}, reward error: {:.4f}, ep_reward: {}, ep_dur: {}'
.format(t, e_t, reward, exp.reward_100ma.get_average(), exploration, str(action), td_error, model_error, reward_error, exp.last_episode_reward, exp.last_episode_duration))
progress_bar.update()
progress_bar.close()
[9509][4509] reward: -0.02, reward 100MA: -0.02, Exploration: 0.00, action: [ 0.00708318 0.00451113], td-error: 0.0232, model error: 0.0033, reward error: 0.0000, ep_reward: 0, ep_dur: 0: 95%|█████████▌| 9510/10000 [30:52<01:30, 5.39it/s] 00:00<1:25:37, 1.95it/s][2017-05-22 07:38:27,378] GLFW error: 65544, desc: X11: RandR gamma ramp support seems broken
[9999][4999] reward: -0.02, reward 100MA: -0.02, Exploration: 0.00, action: [-0.00806123 -0.00390194], td-error: 0.0000, model error: 0.0024, reward error: 0.0000, ep_reward: 0, ep_dur: 0: 100%|██████████| 10000/10000 [32:24<00:00, 5.26it/s]
In [15]:
exp.save()
print("Experiment results saved in " + exp.path)
Experiment results saved in experiments/experiment_dmlac_Reacher-v1_1495429650
In [16]:
exp.plot_cumulative_reward()
Out[16]:
[<matplotlib.lines.Line2D at 0x7f2786c60e90>]
In [17]:
exp.plot_reward()
Out[17]:
[<matplotlib.lines.Line2D at 0x7f2786c60e50>]
In [18]:
exp.plot_td_error()
Out[18]:
[<matplotlib.lines.Line2D at 0x7f2786c60e10>]
In [19]:
exp.plot_model_error(skip_steps=settings["learning_start"]+10)
Out[19]:
[<matplotlib.lines.Line2D at 0x7f278c20f810>]
In [20]:
exp.plot_reward_error(skip_steps=settings["learning_start"]+10)
Out[20]:
[<matplotlib.lines.Line2D at 0x7f27fc031a50>]
In [21]:
exp.plot_episode_reward()
Out[21]:
[<matplotlib.lines.Line2D at 0x7f278c3f2690>]
In [22]:
exp.plot_episode_duration()
Out[22]:
[<matplotlib.lines.Line2D at 0x7f278c0be190>]
In [23]:
if settings["render_environment"]:
exp.display_frames_as_gif()
In [24]:
#session.close()
In [25]:
exp.print_all_tf_variables()
(u'Actor_hidden_0_256_W:0', array([[ -1.59200519e-01, 7.07406342e-01, -2.57752270e-01, ...,
4.30091560e-01, 2.84444809e-01, -1.11342919e+00],
[ 3.61775428e-01, -6.28978945e-04, 4.85315114e-01, ...,
4.24738735e-01, -2.29170427e-01, 9.93753448e-02],
[ 2.30749231e-02, 8.68954509e-02, -5.38680479e-02, ...,
-2.02614397e-01, -9.76214260e-02, -7.37709180e-02],
...,
[ -5.45451045e-01, -3.80574584e-01, 5.70143223e-01, ...,
-3.40593845e-01, -3.01140726e-01, -7.33810961e-01],
[ 5.15907928e-02, 6.47842824e-01, -5.72929978e-01, ...,
-2.47253757e-02, 6.46479055e-02, -7.15157807e-01],
[ 2.57557988e-01, 3.82447213e-01, 3.81117798e-02, ...,
-3.76374125e-01, -2.43600562e-01, -3.93628061e-01]], dtype=float32))
(u'Actor_hidden_0_256_b:0', array([ -1.69770741e+00, 1.32229924e-01, 5.87846458e-01,
-6.46643758e-01, -2.44890615e-01, 1.26019931e+00,
8.03609431e-01, -8.53722915e-02, 1.22035250e-01,
6.06763542e-01, -3.71667117e-01, 1.61848855e+00,
1.18660629e+00, -3.12931508e-01, 7.68407226e-01,
-1.25676346e+00, -2.90155023e-01, 3.79153609e-01,
5.16847193e-01, -1.68643296e+00, -1.23086071e+00,
-4.74294811e-01, -6.09741509e-01, 5.92775829e-02,
-2.03893518e+00, 1.50752664e+00, -3.39959329e-03,
-2.15486959e-01, 3.10510755e-01, -3.69401336e-01,
1.98609084e-01, -6.65316805e-02, -2.10816693e+00,
5.06912887e-01, 8.49014103e-01, -5.50775230e-01,
-6.74520135e-01, 8.59201789e-01, 9.33341607e-02,
-7.73510814e-01, -1.19000208e+00, -8.18597004e-02,
1.55856639e-01, -1.15373087e+00, -4.94192481e-01,
1.30048543e-01, 7.23778129e-01, 2.85335031e-04,
-1.36957920e+00, -9.31482136e-01, -7.48140395e-01,
-7.94343054e-01, -2.52137572e-01, -1.52218354e+00,
2.51556635e+00, -5.26907332e-02, -8.76020372e-01,
2.04678938e-01, 3.42346787e-01, -1.11057484e+00,
-2.31888235e-01, 7.59059250e-01, -7.33265102e-01,
3.91362816e-01, 3.82247657e-01, -1.00127184e+00,
-1.28649354e-01, 2.16457948e-01, -1.37603414e+00,
2.35273147e+00, -6.31437302e-02, -1.55156696e+00,
-5.27567863e-01, 5.04798532e-01, 9.16516840e-01,
2.81773567e-01, 2.00918865e+00, 1.62537563e+00,
1.01146972e+00, 1.14520228e+00, 1.01766035e-01,
1.76749694e+00, 7.22903073e-01, 1.52985287e+00,
-1.36360955e+00, 6.40647411e-01, -9.28067863e-01,
-2.65697449e-01, -1.90478325e-01, -6.06039345e-01,
-2.83755684e+00, -6.07533276e-01, 8.29173326e-01,
5.37379920e-01, 4.96817380e-01, 2.39048496e-01,
-4.07642692e-01, -1.23019290e+00, 6.06707752e-01,
2.81685770e-01, 1.31827557e+00, -5.29869437e-01,
-9.30823833e-02, -6.03818893e-01, 1.11578465e+00,
-1.12926805e+00, -9.24357533e-01, -2.93122441e-01,
1.34150541e+00, -6.31969154e-01, -3.75775963e-01,
6.30696356e-01, 8.41134250e-01, -2.14407015e+00,
6.05598152e-01, -1.50329566e+00, -7.57039011e-01,
-2.39552662e-01, -5.24222493e-01, -1.73201120e+00,
7.20339358e-01, 1.92708647e+00, 6.24997355e-02,
-1.01489961e+00, -3.40879649e-01, -1.81460273e+00,
-2.27071494e-01, -1.06489325e+00, -9.18014050e-01,
-4.31327611e-01, -3.99435788e-01, -1.43082845e+00,
-2.38414645e+00, -1.48123884e+00, 1.30674386e+00,
6.40882373e-01, -1.38795114e+00, 6.69498920e-01,
-2.98614204e-01, 1.96600044e+00, -2.98854381e-01,
-1.87599123e-01, -1.22619438e+00, 4.09008026e-01,
-6.31369293e-01, -4.54778016e-01, -9.42614377e-01,
-9.99396026e-01, -6.08230792e-02, -8.93492222e-01,
-8.81218314e-01, -1.40675259e+00, -6.98424697e-01,
-2.09527612e+00, -2.74604440e-01, 1.57350093e-01,
2.82130800e-02, 6.42962635e-01, -1.00219107e+00,
1.84112549e+00, 1.57546675e+00, -5.39850116e-01,
-1.29108870e+00, -1.91095740e-01, -1.58786750e+00,
-6.57051325e-01, -2.74895206e-02, -8.73202145e-01,
-7.31165230e-01, -1.06858778e+00, 1.53602290e+00,
8.14533308e-02, -2.87707239e-01, -1.53154278e+00,
-6.90541387e-01, 8.79594982e-02, -2.18290734e+00,
-3.24461520e-01, -5.51298618e-01, 4.00012851e-01,
1.52237606e+00, 8.35713029e-01, -5.26777625e-01,
-2.62374908e-01, 4.89224374e-01, -9.60011363e-01,
3.36109966e-01, -4.30986844e-02, -6.28874123e-01,
-3.82899791e-01, -8.56480673e-02, 1.01861727e+00,
-1.12022793e+00, -1.72108188e-01, -1.08481956e+00,
1.44620693e+00, -1.11856663e+00, -3.00550431e-01,
-1.35100707e-01, -1.19417086e-01, -5.35012424e-01,
-1.00218916e+00, 1.05756867e+00, 1.20588970e+00,
5.93243957e-01, 2.08948314e-01, 1.21939719e+00,
-1.17459960e-01, 8.61894488e-02, -5.85070789e-01,
-1.34825945e+00, 1.27085137e+00, 8.64821613e-01,
7.28441894e-01, -4.60257381e-01, -4.99435306e-01,
-1.20352542e+00, 1.72608054e+00, -9.44004416e-01,
-9.03090894e-01, 5.72560728e-01, -5.76742828e-01,
-4.87091750e-01, -3.55694354e-01, 1.47339571e+00,
1.00148880e+00, 2.04609022e-01, -7.92925417e-01,
-4.48416919e-01, -4.45881456e-01, 3.03356886e-01,
1.81258678e+00, -3.82268697e-01, 1.47402778e-01,
-1.18300891e+00, -7.47193694e-01, 4.69499350e-01,
2.75911719e-01, 7.47999489e-01, 1.91108811e+00,
-7.98703372e-01, -1.42233527e+00, 2.85179564e-03,
-1.37224233e+00, -1.18782021e-01, -1.98223197e+00,
1.06712222e+00, 2.06976771e+00, 1.71818817e+00,
1.09067893e+00, 4.81112957e-01, 2.46687269e+00,
5.61092615e-01, 1.06760359e+00, -6.19043522e-02,
-1.48465705e+00], dtype=float32))
(u'Actor_hidden_1_128_W:0', array([[-0.07560842, 0.03308789, 0.10243797, ..., -0.09710516,
-0.0222379 , 0.08200189],
[-0.03448282, 0.00893423, 0.01769803, ..., -0.02288969,
-0.10803212, 0.03134444],
[ 0.05989021, -0.08696841, -0.19518466, ..., -0.06357884,
-0.03444228, 0.05066103],
...,
[ 0.00172155, -0.02993508, -0.05935157, ..., -0.06495619,
0.0650861 , -0.00370723],
[-0.00151722, -0.09723307, 0.16061948, ..., 0.11217 ,
0.03351741, 0.0310208 ],
[ 0.0632278 , 0.06207932, -0.0459808 , ..., -0.03411122,
0.02335089, 0.12366739]], dtype=float32))
(u'Actor_hidden_1_128_b:0', array([-0.13225348, -0.77602279, 0.6239872 , 1.12494278, -2.11821079,
-0.04306398, 0.52565539, 0.06562035, -0.79218757, 0.02169892,
-0.67759007, 1.20875299, 1.60540688, 2.22248125, 0.11493775,
-0.73627597, 0.39726618, 0.66968703, 0.7883569 , -0.52461028,
-1.97558534, -0.77395976, 0.58421677, -1.32164204, 0.92347383,
-0.05431734, 0.47732654, 0.80318189, 0.47714016, 1.36182427,
0.29976445, -1.58954525, 0.60293895, 0.62942505, -0.31681871,
-0.81050581, 0.8433814 , 0.80523372, -0.09678968, 0.72465014,
0.24947299, 0.89292282, 0.38702017, 0.37290511, 0.35648689,
-0.56402403, 1.51802158, -1.65834498, -1.49984968, 0.30902156,
-1.58278108, -1.42345345, -0.52369577, 0.10898788, -0.56730312,
1.13388193, 0.09305318, -0.52120864, -0.15591003, -0.79088306,
-0.71379679, -0.7511766 , -1.16287708, -1.00548828, 1.25988793,
0.2324394 , 0.04563659, 0.56432271, -0.47755015, -1.00443554,
2.35392046, -0.75343174, -0.52096075, 1.03530324, -2.0628581 ,
-1.92618644, 0.40230605, -1.20537865, -2.06665516, 1.14448297,
-0.15574856, -1.14810193, 1.78216171, -1.59812975, -0.13451104,
0.66924417, 0.05000842, 0.23313372, -0.93109441, -1.32002068,
0.80643189, -1.20479703, 1.7816484 , 0.60448867, -0.0307741 ,
-0.01047726, 0.33662856, 0.76727623, 0.99912024, -1.39571345,
0.23337935, 0.41480714, 0.86130846, -1.08527517, -0.5490163 ,
-0.40281561, -0.39271334, 1.3453095 , 0.18047607, -0.50567633,
-1.46273363, -0.00514394, 0.33859152, 1.12261128, -0.38688487,
-0.5249511 , 0.42976189, 0.74103993, 0.06782812, 1.43882072,
1.28268731, 1.35212982, 0.71743703, -0.6992594 , -0.98381442,
-0.49026796, -0.44366968, 1.01984668], dtype=float32))
(u'Actor_output_W:0', array([[ 0.06719612, 0.03344682],
[ 0.01645906, -0.06311733],
[-0.02546391, 0.090141 ],
[-0.07495835, -0.08572064],
[ 0.01023509, 0.02632931],
[-0.12002798, 0.09810884],
[ 0.02279054, -0.20053859],
[ 0.08976113, 0.06532629],
[ 0.05060269, 0.08270789],
[-0.03589604, 0.03258369],
[ 0.01200086, -0.0980861 ],
[-0.1194201 , 0.03486066],
[ 0.14185622, -0.04989427],
[ 0.06858592, -0.14059854],
[ 0.08942389, -0.05581468],
[ 0.08882316, 0.04374557],
[ 0.0740087 , 0.0285357 ],
[ 0.00865029, 0.0568868 ],
[-0.02754414, 0.00881875],
[-0.02968123, -0.00116999],
[ 0.05443652, 0.13276167],
[-0.15532297, -0.0717786 ],
[ 0.13742189, -0.02615428],
[-0.0207289 , -0.1960399 ],
[-0.04083242, 0.24379633],
[ 0.00205377, -0.04203631],
[-0.07051419, -0.08239187],
[-0.0628883 , 0.05636936],
[ 0.00128713, 0.06775969],
[ 0.03856419, 0.10722463],
[ 0.12135898, -0.06947833],
[-0.10704261, 0.02116884],
[ 0.06693276, -0.01448391],
[ 0.0184346 , 0.12683849],
[-0.10435172, -0.07030713],
[-0.01833918, -0.00775422],
[ 0.05430368, 0.04699774],
[-0.06866384, -0.04581886],
[ 0.10124458, -0.04579908],
[-0.10284933, -0.06646922],
[ 0.03551849, -0.04490092],
[-0.04701975, 0.06857959],
[-0.10003398, 0.05312582],
[-0.03809109, 0.0099235 ],
[ 0.05424875, -0.06061707],
[ 0.00874255, -0.01974282],
[-0.12243922, 0.02547248],
[ 0.05868348, -0.0378463 ],
[ 0.01942744, -0.04123366],
[-0.03677591, -0.08372483],
[ 0.08757161, 0.03770398],
[ 0.07471299, 0.06485239],
[ 0.03399199, 0.02483624],
[ 0.16810912, 0.17832783],
[-0.02823913, -0.02419134],
[ 0.06049757, -0.10403246],
[ 0.06938455, 0.00543582],
[ 0.02749696, 0.06067374],
[-0.0546722 , 0.13998985],
[-0.0542739 , 0.0222363 ],
[-0.05522741, 0.00762491],
[-0.00922052, -0.05235448],
[ 0.15236834, 0.05194926],
[ 0.04402372, 0.21257551],
[ 0.0170405 , 0.09264185],
[-0.05484943, 0.03147439],
[-0.03064769, 0.01186323],
[ 0.06199154, -0.05995285],
[-0.03303626, -0.12149513],
[-0.04742181, -0.041878 ],
[ 0.07408556, -0.05820363],
[-0.08302434, -0.00712981],
[-0.00504689, -0.0045288 ],
[-0.03679478, -0.03427048],
[ 0.07559698, 0.05492543],
[ 0.00526755, -0.05552596],
[ 0.01783131, 0.03855588],
[-0.02056941, -0.06913704],
[ 0.11858123, -0.20397171],
[ 0.08401693, -0.07499496],
[ 0.06082063, -0.03002699],
[ 0.11784724, 0.14664398],
[-0.0871404 , 0.11283079],
[-0.07119913, -0.1153825 ],
[ 0.00052665, -0.00030757],
[-0.03195947, -0.08868734],
[ 0.00309316, -0.00106493],
[-0.10633682, 0.1306349 ],
[ 0.07192399, 0.00375487],
[ 0.00359101, -0.02189568],
[ 0.1681025 , 0.17188008],
[-0.06937709, 0.14312072],
[-0.07049862, -0.02240451],
[ 0.19120398, -0.02311124],
[-0.02317316, 0.00095879],
[-0.05044302, 0.00690788],
[-0.00828584, -0.04614935],
[ 0.07845294, 0.10364811],
[ 0.047111 , -0.02047509],
[ 0.11645436, -0.05246005],
[ 0.0005702 , -0.00073004],
[-0.05870356, -0.01717452],
[ 0.057774 , 0.12961584],
[ 0.12094188, 0.12951465],
[ 0.1104091 , 0.01325048],
[ 0.08282439, 0.11192418],
[-0.0976658 , 0.01989834],
[-0.02941928, 0.04859546],
[-0.02075154, 0.05480024],
[-0.04954129, 0.02070877],
[ 0.11965461, 0.11796667],
[-0.06582583, -0.04739656],
[ 0.07564461, -0.02677999],
[-0.1083236 , -0.02787597],
[-0.08409841, -0.04502684],
[-0.02686496, -0.14027761],
[-0.0597458 , 0.06177064],
[-0.06548636, -0.01040875],
[-0.05331907, 0.06835786],
[-0.09520576, -0.20798358],
[-0.04043613, -0.25441945],
[ 0.0126388 , -0.02219233],
[-0.10274597, 0.06933706],
[ 0.05553005, -0.02139485],
[ 0.08692277, 0.13711189],
[-0.01838638, 0.0729055 ],
[ 0.05711128, -0.09383 ],
[ 0.03767119, 0.00766797]], dtype=float32))
(u'Actor_output_b:0', array([ 0.73959541, -0.06539465], dtype=float32))
(u'Model_encoding_0_128_W:0', array([[ 0.69518381, 0.28459105, -1.20984602, ..., -0.42699695,
-0.53744906, -0.86097789],
[ 0.25091755, -0.50975102, 1.14339221, ..., 0.67765826,
-0.26473135, -0.04183073],
[-0.54383576, 0.65309733, 0.08241607, ..., -2.03021789,
-0.6905005 , 0.32549372],
...,
[-0.48269087, -0.18191172, 0.70629668, ..., 0.61481202,
0.31247672, 0.26589 ],
[-0.11139401, -0.02847646, -0.0112994 , ..., 0.83747524,
-0.60932291, -0.22928904],
[-0.03754782, -0.02803496, -0.29735637, ..., -0.15299448,
-0.13218565, 0.16234143]], dtype=float32))
(u'Model_encoding_0_128_b:0', array([ -4.28694040e-01, -2.74977069e-02, -1.74912989e+00,
-8.28655899e-01, -2.82251865e-01, -1.35869086e-01,
-2.20593429e+00, -1.35397160e+00, -6.34484351e-01,
7.49152422e-01, -8.31321061e-01, 1.51466107e+00,
-1.63956845e+00, -7.37591445e-01, 9.11748707e-01,
-2.11833072e+00, -9.46126938e-01, -3.65781039e-02,
-5.07870801e-02, -1.22573507e+00, -1.63754821e-01,
-1.47380757e+00, 3.93667400e-01, -7.37642109e-01,
-1.80934823e+00, -5.96334375e-02, -2.01744616e-01,
3.46266419e-01, -7.30175853e-01, -1.72702360e+00,
1.14971912e+00, -7.44519114e-01, -6.80421218e-02,
-1.62186480e+00, 3.13054979e-01, -1.11275303e+00,
1.77553132e-01, -1.83309078e+00, 5.42039812e-01,
-2.25263923e-01, 3.84358317e-02, -3.31565529e-01,
4.46954668e-01, 7.83869531e-03, 1.82833672e-01,
-1.90974391e+00, -1.62187326e+00, -8.34402978e-01,
6.52829744e-03, -1.59955180e+00, -4.76934388e-02,
-2.38911605e+00, 1.73220229e+00, 6.74384907e-02,
5.88770986e-01, 1.28624773e+00, 1.22617221e+00,
8.93509030e-01, -1.63886487e+00, 9.14223909e-01,
-8.59100819e-01, -1.62574029e+00, 2.35070157e+00,
-6.75485611e-01, 2.70809829e-01, 1.61744261e+00,
-2.29331684e+00, -1.46213245e+00, 3.29050064e-01,
3.14883977e-01, -2.90406728e+00, -1.44672573e+00,
-5.07438362e-01, -1.21094191e+00, -2.11887434e-03,
3.99132401e-01, -1.11395597e+00, -2.36560881e-01,
-1.19139421e+00, -3.78835142e-01, 1.15997732e+00,
3.93993616e-01, -8.42029214e-01, -7.33429193e-01,
-1.07543862e+00, -3.16783786e-01, -4.09674168e-01,
-6.39162004e-01, -1.16024446e+00, -2.15204644e+00,
-7.47263491e-01, -1.12933755e+00, 1.84869274e-01,
-6.36922494e-02, 6.70756400e-01, 1.60401449e-01,
-1.47027981e+00, 1.99212432e+00, -1.17621887e+00,
3.49577129e-01, -4.07329142e-01, -1.01257372e+00,
1.05123851e-03, 1.41816393e-01, -7.21713662e-01,
-5.15124500e-01, -1.03009343e+00, -2.37993503e+00,
2.76494813e+00, 1.89254105e-01, -6.53514862e-01,
8.31958428e-02, 3.62908989e-02, -5.70600271e-01,
-1.10946035e+00, -7.84381866e-01, 2.53147340e+00,
-5.94010234e-01, 7.32733667e-01, 2.93406814e-01,
1.04726517e+00, -1.41966403e-01, 9.38850522e-01,
1.95187286e-01, 2.45171666e+00, -1.33682108e+00,
7.00154722e-01, 1.61046028e-01], dtype=float32))
(u'Model_encoding_1_128_W:0', array([[ 3.21138263e-01, -1.33093908e-01, 2.20629126e-01,
-1.26325630e-03, -5.37024081e-01, -1.58974099e+00,
-3.12262356e-01, -2.44659975e-01, -4.37725931e-01,
2.92956918e-01, -6.07213318e-01, 1.00799196e-03,
2.08344460e-02, 5.53831935e-01, 4.24227238e-01,
2.40737006e-01, 5.58335245e-01, 2.42304802e-01,
2.43565226e+00, 1.08438730e-01, 2.06174582e-01,
-1.20062900e+00, 4.45884645e-01, -2.61662275e-01,
-1.03413403e+00, -1.18550968e+00, -7.74058700e-01,
-9.34876740e-01, 8.76336575e-01, -4.19491120e-02,
-1.52341330e+00, 5.47529876e-01, -1.31448531e+00,
6.61573038e-02, 1.59455813e-03, 6.94824988e-03,
-4.88397926e-01, -3.50562364e-01, -4.33493078e-01,
-6.16130292e-01, 5.02186298e-01, -4.37156707e-01,
4.12623910e-03, 1.10280108e+00, -6.53218567e-01,
6.50488913e-01, -3.86088431e-01, -3.93712014e-01,
-1.71983027e+00, 2.48376563e-01, 6.94252900e-04,
-6.20687706e-03, 9.76549163e-02, -6.80926204e-01,
1.30759239e+00, -8.98431897e-01, -4.19284159e-04,
6.80821687e-02, 7.97504902e-01, 1.21342039e+00,
1.97687531e+00, 4.66637081e-04, 2.27095619e-01,
3.36764693e-01, -1.03263807e+00, -3.17672670e-01,
4.84105200e-03, -1.92747474e+00, -1.81940293e+00,
-3.25714052e-01, 1.40071571e+00, -4.54541326e-01,
2.06346011e+00, 1.13147926e+00, -1.41757175e-01,
-1.60693455e+00, 3.86190205e-03, -1.80094409e+00,
-4.61166233e-01, -9.67485547e-01, -3.56684253e-02,
1.54268432e+00, -1.74973512e+00, -1.80968165e-01,
-5.63972414e-01, 1.05592227e+00, 2.07540184e-01,
-5.45271635e-01, 1.53429478e-01, 2.34005392e-01,
-1.68375993e+00, 9.80935097e-02, -1.90326810e-01,
9.63792379e-04, 3.97171862e-02, -8.65548998e-02,
-3.56637985e-01, 1.26964092e-01, 1.61195624e+00,
-6.62924838e-04, 9.19421136e-01, -1.33161962e+00,
-1.39440417e+00, 1.46560287e+00, 1.46038902e+00,
6.35830939e-01, -1.16640294e+00, -1.94508851e+00,
-1.40499246e+00, 4.41272616e-01, -4.16648149e-01,
7.80492183e-03, 3.37062478e-02, -1.07358836e-04,
-4.41409126e-02, -4.31603998e-01, 1.53543144e-01,
1.30759224e-01, -2.56475108e-03, 7.63776898e-01,
1.53784764e+00, 1.83141470e+00, -1.21040735e-04,
1.20823312e+00, -3.02580390e-02, -4.38995101e-02,
-6.82639301e-01, -1.55696833e+00],
[ 2.18755603e-01, 3.30512911e-01, 5.30732870e-01,
-6.91586826e-03, 8.88140559e-01, 1.38035202e+00,
4.22113955e-01, 3.27690899e-01, 5.80328465e-01,
-8.95680904e-01, 3.21526736e-01, 4.35855519e-03,
-3.05131935e-02, -7.67381370e-01, -6.68240368e-01,
1.04581594e+00, -7.57330835e-01, 1.01685964e-01,
3.97460639e-01, -2.22352877e-01, -9.30842459e-01,
-1.02335179e+00, -1.33730268e+00, -2.17042062e-02,
-4.03183699e-01, 8.96039903e-01, 5.22241890e-01,
2.08088726e-01, 1.10845661e+00, 3.14849257e-01,
6.44634008e-01, 8.22537467e-02, 1.39373326e+00,
-9.07135904e-01, 8.45335238e-03, 5.54452743e-03,
4.61579971e-02, -1.94758379e+00, -2.03927088e+00,
1.31577477e-01, 3.90882976e-02, 3.53519797e-01,
4.71046660e-03, 1.05296999e-01, -2.76306093e-01,
1.03025186e+00, -6.11744583e-01, -9.12510753e-01,
-1.20925140e+00, -9.06160057e-01, -8.15461483e-03,
-5.44668376e-01, 1.22065639e+00, 8.14486891e-02,
-1.25983584e+00, -1.82801557e+00, -7.66913733e-03,
3.30773056e-01, -4.06407297e-01, 8.46734166e-01,
7.46151686e-01, 2.75360723e-03, 1.47149101e-01,
7.79527545e-01, -1.43433404e+00, 3.06692324e-04,
5.39695518e-03, 1.44145739e+00, -9.27149534e-01,
-6.42194390e-01, 8.87651503e-01, -1.72311378e+00,
-6.59395278e-01, 7.75151014e-01, 6.25761449e-01,
-1.15683782e+00, -8.97214341e-06, 8.66350591e-01,
1.60445893e+00, 6.81327999e-01, -4.46330696e-01,
-2.12637639e+00, 6.57984972e-01, 3.53970863e-02,
-2.48872668e-01, -1.13855720e-01, 1.74160349e+00,
-7.04182565e-01, 1.78580284e-02, -4.59581345e-01,
3.89939658e-02, -5.21277249e-01, 1.48470432e-01,
-1.08598014e-02, 1.20246798e-01, 1.63158369e+00,
-8.95180762e-01, 5.77921331e-01, -2.26092517e-01,
-1.00619048e-02, -6.88645244e-01, 1.44328368e+00,
1.58837140e+00, -4.86813635e-01, 1.76302683e+00,
-7.94048667e-01, 2.56043643e-01, -1.01352572e+00,
-3.22397709e-01, 1.65047944e+00, 1.72389105e-01,
-2.73340344e-01, 5.57618082e-01, -7.54366675e-03,
-4.99539264e-02, -1.87105328e-01, -8.52472007e-01,
2.18969956e-01, -3.52831115e-03, -5.60944557e-01,
2.14604393e-01, -7.43220627e-01, -2.07326654e-03,
8.10442626e-01, -2.78063156e-02, -4.83574606e-02,
-4.54569966e-01, 2.35334411e-01]], dtype=float32))
(u'Model_encoding_1_128_b:0', array([ -3.53591800e-01, 2.89489119e-03, 1.68837440e+00,
1.72077003e-03, -7.33173549e-01, -5.56634545e-01,
1.10213649e+00, -1.47558010e+00, 2.26101056e-01,
5.54402918e-02, 8.32204640e-01, -1.25711295e-03,
-1.22733489e-02, -8.84554833e-02, 3.51882689e-02,
-7.11705685e-01, -8.17429006e-01, 3.99446964e-01,
-1.94438174e-01, -3.85803699e-01, -8.76153409e-01,
-1.48527056e-01, -2.29479745e-01, -2.16309920e-01,
5.88836730e-01, 3.10406834e-02, 2.72234641e-02,
-2.11090147e-01, 5.37247956e-01, 8.15055192e-01,
-2.39380628e-01, -5.67801952e-01, 7.35999197e-02,
6.08389318e-01, 2.68888776e-03, 3.96747747e-03,
5.48047900e-01, -8.21124673e-01, -8.15922678e-01,
7.88235128e-01, 3.04896504e-01, -6.46521002e-02,
1.45185774e-03, -5.81516549e-02, -5.28949261e-01,
-1.27816045e+00, 8.37535024e-01, 6.74281180e-01,
-1.43979156e+00, -2.53212988e-01, 1.47621066e-03,
4.93921749e-02, -8.83960724e-01, -2.79819936e-01,
1.62328482e-01, -5.67500830e-01, -1.10416277e-03,
2.94668227e-01, -2.54651040e-01, 2.24821496e+00,
-5.22488356e-01, 5.11963526e-03, 5.68230808e-01,
-1.33091407e-02, -9.09581363e-01, 4.66140091e-01,
2.12963019e-03, -6.78313375e-01, -9.07178342e-01,
-3.90967607e-01, 3.33592296e-01, 1.75393999e+00,
-8.12344253e-01, 1.94493875e-01, 7.83673704e-01,
-8.83258730e-02, 3.28287762e-03, -1.71068221e-01,
-5.56717813e-01, -2.17601046e-01, -2.61457637e-03,
-8.34800243e-01, 3.59039605e-02, 2.39728153e-01,
-6.97198063e-02, -6.28778398e-01, -1.92622364e-01,
-3.81820053e-01, 1.78062305e-01, 4.54133481e-01,
-9.43882048e-01, -4.56447572e-01, 1.23507909e-01,
2.91743316e-03, 6.87805772e-01, -1.19784939e+00,
1.31720257e+00, 1.46116510e-01, 2.84409285e-01,
-1.81065139e-03, -7.80790329e-01, 2.74967074e-01,
-2.68098742e-01, -4.40162301e-01, -1.43836474e+00,
1.31207955e+00, -7.52776325e-01, -5.04290998e-01,
-6.74772739e-01, -2.45706394e-01, -1.06818944e-01,
-2.87254214e-01, 6.79569066e-01, 7.38520059e-04,
-2.17231095e-01, 3.78478110e-01, -8.13220978e-01,
-1.05956313e-03, 2.63263378e-03, 7.22431183e-01,
5.34777343e-01, 1.64595008e-01, -1.06621545e-03,
2.89870352e-01, -3.64725590e-02, -2.78015068e-04,
6.17138147e-01, -3.97759110e-01], dtype=float32))
(u'Model_hidden_0_128_W:0', array([[ 0.15385148, -0.07086949, 0.25266925, ..., 0.02395882,
0.06417269, 0.00858688],
[-0.02928857, -0.10270411, -0.43890595, ..., 0.03837156,
0.09018433, 0.0084036 ],
[-0.07166839, 0.07556302, 0.29583478, ..., 0.20021494,
0.13975964, 0.13543563],
...,
[ 0.08009931, 0.17665736, 0.07073602, ..., -0.06999661,
-0.21005966, 0.12438426],
[-0.13400185, -0.0377967 , 0.14145063, ..., -0.06341632,
-0.25172043, -0.03817763],
[ 0.02931257, 0.05650125, -0.03694936, ..., 0.06736658,
0.00633232, -0.06542388]], dtype=float32))
(u'Model_hidden_0_128_b:0', array([ 0.34662816, 0.20968103, 0.55950922, 1.47191012, -1.06928694,
3.42770386, -0.90629488, -1.84855163, -1.40221548, -0.71487433,
-0.36338583, -0.06824226, 0.15237714, 0.10677415, -1.60381019,
1.01772857, -1.78690231, -0.36363432, -1.30383611, -0.77965927,
0.0482565 , 1.88045907, 0.44988295, -1.06382716, -1.18797958,
-1.26287508, -0.46893916, -1.40956521, -1.43453407, -0.32225755,
-0.64099878, -1.18947601, 1.94657505, -0.31499335, -0.28723156,
1.64383459, 0.48390037, 0.19727799, 1.35467517, -0.83697969,
0.93171638, 1.32649267, -0.0722913 , 0.60765207, -0.32225904,
1.25359464, -0.84715044, -0.17851688, -0.44902834, 0.63244396,
-0.94524354, 3.02708101, 1.22451186, -0.93472147, 1.49923933,
-0.65579903, -1.02713764, -0.4162637 , 0.46058685, 1.18859255,
-0.99767232, -0.53451353, 0.15852594, 1.01154113, 0.38369602,
1.41504669, -1.79347336, -0.8779819 , 0.18930613, 0.18131767,
-0.12480238, 0.02087775, 0.89547539, 1.56253433, -1.30195522,
-1.11277819, 1.57470405, -0.74574012, -0.2088989 , 1.03929019,
0.0611239 , -0.21517929, 0.01381626, -0.26891834, 1.16282988,
0.10874241, 1.6338855 , 2.14556623, -0.18659258, 2.44138575,
-0.16484246, 1.08448029, 0.71764338, 0.97498965, 0.06893414,
1.79038584, 2.05231857, -1.69427752, -0.35271874, 0.42474437,
-1.62477422, -0.90681064, 0.54836226, -0.0264253 , 0.01745015,
-0.44483638, -0.84535694, -1.57162213, 0.22365049, 0.36899814,
-0.83046824, 0.55420029, -1.17304063, -0.13687551, -0.92541409,
-0.68269527, -0.28548574, 0.14865412, 0.69655818, -0.26855153,
-1.27151239, 2.75779963, -0.68516302, 0.56907892, 0.38557944,
1.35997093, -0.72228938, 0.62975407], dtype=float32))
(u'Model_output_W:0', array([[ 7.31716072e-03, 3.22455005e-03, 5.77023299e-03, ...,
8.67558084e-03, 1.85865513e-03, -7.44301185e-04],
[ -5.00374995e-02, -4.45772568e-03, -4.32652049e-02, ...,
2.11954154e-02, -2.75489073e-02, -7.60850788e-04],
[ 2.68206676e-03, -4.60719597e-03, -1.21139584e-03, ...,
-1.32429064e-03, 2.73498031e-03, -3.34803015e-04],
...,
[ 1.41959842e-02, -2.25083884e-02, -2.81401910e-02, ...,
-9.18276701e-03, -1.60526088e-03, 1.43709860e-03],
[ 2.11021472e-02, 3.32941138e-03, 8.00302252e-04, ...,
-7.23547023e-03, -9.24836472e-03, -1.79610134e-03],
[ 1.89327286e-03, 3.69076105e-03, -2.32098773e-02, ...,
2.34930776e-03, -3.25146620e-03, 2.47965254e-05]], dtype=float32))
(u'Model_output_b:0', array([ 0.47061214, -1.19863927, -1.30521727, -0.45668426, -0.00295663,
-0.79146188, 0.25768077, -1.68036783, 0.86140007, 2.21447468,
-0.25510615], dtype=float32))
(u'Reward_encoding_0_128_W:0', array([[ 2.61366040e-01, -6.29676925e-03, -4.15560603e-01, ...,
1.88776776e-01, 1.22856855e-01, 2.88239479e-01],
[ -2.50544220e-01, -5.35018086e-01, -1.20561212e-01, ...,
2.91241705e-01, 4.02280502e-02, -2.61186540e-01],
[ -1.70766324e-01, -3.86805296e-01, -4.86867987e-02, ...,
3.16637188e-01, 1.02402516e-01, 3.65410239e-01],
...,
[ 2.51113027e-01, 6.74695671e-01, 1.77191424e+00, ...,
-9.28966030e-02, -4.24026512e-02, -3.09371024e-01],
[ -2.96665281e-01, -6.04471266e-01, -1.76018059e+00, ...,
4.24802035e-01, 2.10374027e-01, -6.33094285e-04],
[ -3.86190206e-01, 4.22850460e-01, 3.04035485e-01, ...,
-8.88429210e-02, -1.81857854e-01, -5.75896688e-02]], dtype=float32))
(u'Reward_encoding_0_128_b:0', array([-0.72233588, -0.67463207, -0.76185793, 1.60749233, 0.19636594,
-1.66816843, -1.41769457, 1.13988972, -0.46429867, -0.37453169,
-0.27976924, -0.15282579, 0.54513949, -0.99970376, 0.0128898 ,
0.0811407 , -0.58778077, -0.19291514, 0.07119756, -0.11825548,
0.98341054, -0.07207923, 0.60793835, 0.07947539, 0.07134511,
1.1362021 , -0.11745257, 0.22574972, -0.11818907, 0.02789942,
-0.70776767, -0.6908282 , 0.57300901, -1.78711998, 0.03491477,
0.47830603, -0.32966605, -0.14419425, -0.76442146, 0.11483639,
0.21463737, -1.09267104, -1.14199436, -2.03781366, 0.2659395 ,
-1.91249502, -0.57364541, -0.06032855, -0.33714706, 0.23715855,
0.21887368, -0.51903641, -0.75301635, -0.00614016, 0.16983971,
-0.27232638, -0.29817212, 0.47639546, 0.15412121, -0.02748007,
-0.37497711, -2.03886819, 0.55259627, -0.67853767, -0.13616924,
-0.27418536, -0.87620914, -0.72497851, -0.19752857, -0.02554152,
1.68159759, -0.04134951, 0.03000114, 0.9035399 , 0.02284332,
-0.82778984, -0.88626653, -0.7861048 , 0.61093831, -0.50939161,
-0.03233551, -0.12329398, -0.22133406, 0.03372964, -0.08827191,
0.17296986, -0.31674215, 0.30938724, 0.02868644, -0.03859705,
-2.6079762 , -0.83212441, 0.28583345, 0.84811187, -0.37040991,
-0.67755157, -0.60867691, -1.02169693, -1.18157244, -1.54113507,
0.15197802, -0.80331206, 0.49116072, -0.65869743, 0.08616125,
-0.49278826, -0.22192466, 0.52288872, -0.02236394, -2.52278852,
-0.99282217, -0.13468161, -0.17751184, -1.47551858, 0.24662291,
0.03438669, -0.37574536, -0.47989789, 0.07613304, -0.47252429,
-0.65685064, 0.02370675, -1.16694129, -0.63383716, -0.17888001,
0.74431753, -0.10684121, -0.51209897], dtype=float32))
(u'Reward_encoding_1_128_W:0', array([[ -4.67955470e-01, 3.97685975e-01, -1.14535011e-01,
4.80270803e-01, 9.36006665e-01, 8.27705920e-01,
-7.77403116e-01, 1.15522254e+00, 1.04362822e+00,
6.26053885e-02, 2.23278701e-01, -6.78583384e-02,
1.18263757e+00, 9.14154410e-01, 7.63982117e-01,
-6.46524578e-02, 8.46817851e-01, 5.17840207e-01,
7.19007671e-01, 4.32647258e-01, -2.53118962e-01,
1.10628593e+00, -4.55191135e-01, 1.87565535e-01,
6.24454916e-01, -1.16788991e-01, -1.85113207e-01,
8.75886381e-02, -1.17268765e+00, 4.47623700e-01,
-3.79354298e-01, -5.98003604e-02, 4.87006307e-01,
-1.08870411e+00, 3.44313771e-01, -2.89594263e-01,
1.17269886e+00, 7.79140592e-01, 1.53546560e+00,
-1.13302505e+00, 1.06320508e-01, -1.03854084e+00,
-3.11234802e-01, -9.14140165e-01, 1.51682138e+00,
2.81883448e-01, -4.14100379e-01, 1.35004878e+00,
7.52346888e-02, 7.76838064e-01, -8.36370170e-01,
-8.34434211e-01, 2.62856036e-01, -1.28879464e+00,
-5.00524223e-01, -4.49258715e-01, -2.44386256e-01,
4.75429386e-01, 3.67583483e-01, -2.27135420e-01,
1.10102631e-01, -1.01102471e+00, 5.76247990e-01,
-1.55534613e+00, -7.45260358e-01, -8.83341253e-01,
-5.91893315e-01, 3.01122695e-01, -1.09177756e+00,
-9.49444324e-02, 4.15702432e-01, -5.12760997e-01,
-3.83265495e-01, -1.21576047e+00, -7.71101952e-01,
1.08327270e+00, 8.78494918e-01, -5.68711758e-01,
-1.30721675e-02, 2.91722000e-01, 3.65262717e-01,
-1.99214414e-01, -6.77439749e-01, -1.16730601e-01,
-1.57524240e+00, -1.07415271e+00, -9.73088592e-02,
-8.10116470e-01, -5.08994937e-01, 9.09641743e-01,
-1.18167274e-01, -4.58449423e-01, -3.95184606e-01,
2.19900131e+00, -4.37768459e-01, 5.15760720e-01,
-2.43488088e-01, 5.06519914e-01, -1.89347833e-01,
-1.65828004e-01, 3.53198975e-01, -1.32702172e+00,
-9.81226563e-02, -1.62138414e+00, 1.84038043e-01,
-1.17089108e-01, 1.75252807e+00, 7.51397252e-01,
-8.41947377e-01, 1.30509949e+00, 7.33456552e-01,
-1.16113615e+00, 1.29160237e+00, -9.64153588e-01,
6.99059069e-02, -5.29629469e-01, -4.41253632e-01,
-1.90800416e+00, -2.70717009e-03, -4.51874167e-01,
6.36484265e-01, -1.15086675e+00, -1.66217983e+00,
7.68325627e-01, -8.54029119e-01, 7.51556903e-02,
-5.03474653e-01, -4.62738462e-02],
[ -1.64051151e+00, -2.24205628e-01, -1.13038540e-01,
-1.52019620e-01, 5.23659885e-01, -2.09299415e-01,
7.90269256e-01, -6.56585395e-01, -8.49585056e-01,
8.41238618e-01, 1.37598836e+00, 1.42281353e+00,
2.64803886e-01, -1.36844897e+00, -4.32870239e-01,
7.52822518e-01, 4.26656246e-01, -1.55312240e+00,
9.00874138e-01, -8.79804075e-01, 1.27272999e+00,
1.13446867e+00, 1.30798265e-01, -4.32711793e-03,
-4.71698433e-01, 7.76445329e-01, -1.48502123e+00,
1.47595882e+00, -2.94392526e-01, 8.25720932e-03,
-5.09965360e-01, 1.33653045e+00, 6.17642581e-01,
-2.18953705e+00, 1.17094481e+00, -1.93318026e-03,
9.39620376e-01, 4.58399773e-01, 1.12364256e+00,
-1.54889512e+00, 2.02920511e-01, 5.38131058e-01,
-1.65319455e+00, 2.92809695e-01, -7.91514277e-01,
-1.12361675e-02, -4.02303301e-02, 4.01537605e-02,
6.44812763e-01, 7.99451351e-01, 3.08061957e-01,
6.67215466e-01, -5.97733818e-02, -1.41051626e+00,
2.01287270e+00, 1.58372819e+00, -7.41858661e-01,
1.28831470e+00, -3.08413416e-01, -1.56598657e-01,
-8.06153715e-01, 7.70687938e-01, 1.91279292e-01,
2.73167938e-01, 1.82539336e-02, 1.60753965e-01,
-1.04769540e+00, -1.39190388e+00, -1.30631256e+00,
-3.61795664e-01, -1.93312243e-01, 9.28959012e-01,
1.47495961e+00, -2.50058949e-01, -8.49048018e-01,
-2.65582919e-01, 1.49155688e-02, -6.41968131e-01,
2.91377544e-01, -1.41718924e+00, 3.57672125e-02,
1.27210736e+00, -2.89588153e-01, -1.35708630e+00,
-5.63376307e-01, -1.24969625e+00, -1.41238475e+00,
1.11535862e-01, -8.87676835e-01, 1.25020003e+00,
7.75742829e-01, 4.44264352e-01, -3.88066657e-02,
-8.49407673e-01, 8.39650035e-01, 9.89397645e-01,
9.01096821e-01, 5.89563847e-01, 7.10922539e-01,
1.41391945e+00, -4.54510212e-01, 6.17870152e-01,
5.12476981e-01, 1.79589823e-01, -5.96406639e-01,
-6.88155353e-01, -1.13795602e+00, 1.83502257e-01,
3.86001766e-01, -1.95019871e-01, -6.32448554e-01,
-1.81954861e-01, 6.89920187e-01, -2.58172691e-01,
7.52977788e-01, -1.82183936e-01, 5.87805510e-01,
-1.50027728e+00, -3.48273456e-01, 3.58070910e-01,
1.78666934e-01, 1.82635393e-02, -9.70044971e-01,
3.51140857e-01, -2.64160991e-01, -1.08847654e+00,
1.27456033e+00, 2.67587900e-01]], dtype=float32))
(u'Reward_encoding_1_128_b:0', array([ 0.09688324, 0.60994142, 1.94803464, 0.48832586, -0.9540062 ,
-0.69233298, -0.31427804, 0.82863104, -0.74675661, -0.08384232,
-1.102584 , -0.27151582, 1.62341964, -0.50068456, 0.27396902,
1.43248045, 0.3695696 , -1.2079134 , 0.56145322, -1.67501235,
-1.27129555, 0.79835945, 0.016769 , 0.57402503, 0.63815939,
0.68658549, -0.36982718, -0.95336497, 0.5767653 , -0.32658708,
0.6193735 , 0.56034672, 1.55121422, -0.95909005, -0.05628935,
-0.19514266, 0.27274212, -0.53359133, -0.10745384, -1.41716194,
-0.55175751, -0.30920571, -1.22955966, -0.85158288, 0.89858359,
-1.07826781, 0.84492481, -0.66922152, 0.55170244, 0.0949516 ,
0.43789101, -0.71628261, -0.01852418, 0.37300426, -1.42019022,
-0.41777194, 1.54889417, -0.62795293, -0.15210551, 0.09196834,
0.12653649, 0.40592125, 0.01263528, -0.31741041, 1.92752957,
0.60750508, 0.09252909, -0.38651255, -0.61064231, -2.08758163,
-0.58437401, -0.96612549, -0.3120015 , -2.00327301, 0.53327483,
-0.53346753, 1.41357279, 0.60273159, -0.01473782, 0.99621481,
-0.19890012, 0.17933816, 0.78794503, -0.32325917, 2.16707969,
-1.52839541, -1.57080448, -0.11768883, -0.22508362, 0.72050411,
0.72110355, 0.79039562, 0.59132606, -0.1370811 , 0.66876167,
0.45128644, -0.8499893 , 0.54716516, -0.73824459, -1.26398289,
-0.26566228, -1.09981751, -0.31759539, -0.85698116, -1.19017923,
0.46552086, -1.67348099, -0.15167452, 0.95777988, -0.61502564,
-0.09650173, 0.18450709, 0.08087637, -0.99210221, 0.6680162 ,
0.28525469, 0.91398233, -0.13092671, 0.84788328, -0.70205092,
0.66242087, -0.52369511, -2.68851852, 0.39604178, -1.02564108,
-1.60364223, -0.86979604, -1.2312032 ], dtype=float32))
(u'Reward_hidden_0_128_W:0', array([[ 0.10958645, 0.09395231, -0.00755225, ..., -0.0507121 ,
0.16303818, 0.02777584],
[ 0.12196246, -0.06756705, 0.02336294, ..., -0.0724019 ,
-0.07290371, 0.02857098],
[-0.05380745, 0.05031267, -0.13770364, ..., -0.10284197,
-0.02085213, 0.08140637],
...,
[ 0.08387269, 0.35501432, 0.11285098, ..., 0.19836064,
0.13795523, 0.05003012],
[ 0.0666175 , 0.13649631, 0.25468102, ..., -0.10407271,
0.04947896, 0.30685818],
[ 0.09381811, 0.07854906, 0.02567767, ..., 0.31008086,
0.08970757, -0.10440981]], dtype=float32))
(u'Reward_hidden_0_128_b:0', array([ 0.31849095, 1.65584016, -1.9761765 , -0.27554646, 0.77482647,
-0.07802153, 1.47769785, -1.03299379, -0.13675053, 0.91928726,
0.35569376, 0.44142637, -0.34278303, -2.5468924 , -2.04564881,
1.23282301, 0.58311677, -1.05032074, 1.27139544, -1.87821531,
-0.46453881, -0.04010871, -0.52192909, -1.24669409, -0.19001007,
2.57051516, -0.59765518, -1.19809651, 0.2356036 , -0.87956572,
-0.91665483, -0.41843697, -0.53915858, 0.73523158, -2.27436733,
-0.04816004, 0.28993016, -0.41041058, -0.13482001, 0.40752959,
-1.50960147, -1.46167767, -0.002826 , 0.19009069, -2.35962939,
0.39835244, -1.69973731, 0.4604899 , 0.74178618, 1.94377339,
-1.05293369, -2.16296172, -0.56618845, 1.57766402, 2.39953375,
-1.70902562, -0.55895746, 0.95113677, -0.42529988, -0.60647202,
-0.32296064, 1.4479841 , 0.57611191, 0.52308631, -0.49062949,
-0.12156148, 0.66866684, 0.65552664, -0.35484347, -0.29394448,
1.13458729, -1.7355485 , -0.62908727, 1.33259594, 0.56410593,
-0.42665923, -0.39788625, -0.90291399, 0.37335518, -0.30849686,
-1.01172352, 0.41893634, -0.5362457 , -0.61342859, 0.66131639,
0.49315941, 0.28410327, -0.85489666, 0.38995394, -0.56975156,
-0.02545753, 0.28034109, 0.26142138, -1.76030564, 1.35060072,
-1.23855162, -0.74403739, 0.2162879 , -0.9630003 , 0.75629848,
-0.46938166, 0.84211957, 0.31830314, 0.67192864, 1.73537838,
1.07791543, -0.63848263, 0.87754911, 0.04609891, 0.56716698,
-1.73614442, 0.91953105, -0.52436548, 0.21304777, -0.52170372,
0.16445412, 0.28852525, -0.73122156, -0.20092122, 0.40548271,
0.6465705 , 1.3146323 , -0.71616226, 0.76377434, 1.10345125,
-1.46179831, -1.77694261, 0.79263777], dtype=float32))
(u'Reward_output_W:0', array([[ -1.72245689e-02],
[ -5.40777110e-03],
[ -2.02246476e-02],
[ -1.96598098e-02],
[ 7.61108175e-02],
[ 1.54440671e-01],
[ -1.99868828e-02],
[ -2.25408673e-02],
[ 1.38443150e-02],
[ -4.44268957e-02],
[ -9.30994097e-03],
[ 1.28578686e-03],
[ -2.58747917e-02],
[ 5.40112443e-02],
[ 4.77358364e-02],
[ -6.92831236e-04],
[ -2.07916051e-02],
[ 8.84162188e-02],
[ -2.42201117e-04],
[ -8.39330181e-02],
[ -1.18675679e-02],
[ 2.88240630e-02],
[ 1.21293578e-03],
[ 5.63037931e-04],
[ -3.64847779e-02],
[ 8.52836296e-04],
[ 8.98375147e-05],
[ 7.35253841e-02],
[ -1.10123660e-02],
[ 1.12899430e-01],
[ -1.85649954e-02],
[ 1.38908967e-01],
[ -3.91598296e-04],
[ 1.86188030e-03],
[ -2.84390412e-02],
[ 5.00518829e-04],
[ 1.57851838e-02],
[ -2.00999323e-02],
[ 6.04934134e-02],
[ 1.72049477e-04],
[ 1.32909462e-01],
[ -5.36111998e-04],
[ 3.86757776e-02],
[ -2.35444363e-02],
[ 1.29627921e-02],
[ 7.87441037e-04],
[ 1.07414193e-01],
[ -9.33090970e-03],
[ -6.63202663e-05],
[ -1.79252476e-02],
[ 3.44425924e-02],
[ 7.12587088e-02],
[ -8.07353016e-03],
[ 1.28498534e-03],
[ -1.46755512e-04],
[ 1.30814230e-02],
[ 7.52521083e-02],
[ 5.39731875e-04],
[ -1.05085710e-04],
[ 1.79688744e-02],
[ -9.02518281e-04],
[ -1.38304234e-02],
[ -1.75810307e-02],
[ -2.58444306e-02],
[ -1.35383103e-03],
[ -4.96846566e-04],
[ -1.32483095e-02],
[ 1.18371211e-02],
[ -3.99680398e-02],
[ 9.04524131e-05],
[ -1.56366657e-02],
[ 6.10387027e-02],
[ 8.24935585e-02],
[ 4.74022410e-04],
[ -1.30513171e-02],
[ 3.23380716e-02],
[ -1.48249138e-02],
[ -2.10611410e-02],
[ -1.30558060e-03],
[ 6.23196363e-04],
[ -1.73743698e-04],
[ -3.13807763e-02],
[ -6.61696494e-03],
[ -1.09379813e-01],
[ -9.47421140e-05],
[ -2.15944722e-02],
[ 4.45457881e-05],
[ -8.35269529e-05],
[ -1.75928455e-02],
[ 9.69549045e-02],
[ -3.44349854e-02],
[ 1.01659633e-01],
[ 3.29783671e-02],
[ -1.19240284e-02],
[ -1.68906222e-03],
[ 1.24960423e-01],
[ -3.18570121e-04],
[ 3.68983997e-03],
[ 4.12989631e-02],
[ 9.45681985e-03],
[ -1.61004136e-04],
[ 4.14938591e-02],
[ -6.68773660e-04],
[ 1.13565606e-04],
[ -3.94434668e-02],
[ 7.82577321e-03],
[ 1.77549832e-02],
[ 5.04783355e-04],
[ -1.42363701e-02],
[ -2.57702731e-02],
[ -1.10552669e-01],
[ 1.21373648e-03],
[ 3.49709652e-02],
[ 9.71700177e-02],
[ -4.06009203e-04],
[ 3.31087187e-02],
[ 6.28633541e-04],
[ 9.34682414e-03],
[ 2.16186289e-02],
[ 1.24026127e-02],
[ 3.50790942e-04],
[ -1.97513169e-03],
[ -2.00540572e-02],
[ -3.90912928e-02],
[ 4.09931317e-02],
[ 1.80372193e-01],
[ -2.57563218e-02],
[ -3.52936797e-02]], dtype=float32))
(u'Reward_output_b:0', array([ 1.16720629], dtype=float32))
(u'Value_hidden_0_256_W:0', array([[-0.13785458, -1.17496097, -2.2069838 , ..., 1.64754224,
0.19581114, 0.76055712],
[ 0.04965213, -0.20034274, -0.44636357, ..., 0.82017136,
-0.71539456, -0.21454801],
[-0.81460333, 0.49478075, -0.55952108, ..., -0.02308499,
0.03912404, -0.83535153],
...,
[ 0.61298829, -0.1682827 , -0.57667935, ..., 1.75981581,
0.33030128, -0.08921014],
[-0.94252425, -0.27522105, 0.10011103, ..., 0.70506632,
-0.13518064, -0.79008645],
[ 0.27773952, 0.44907433, -0.1070155 , ..., -0.2441431 ,
0.1781386 , 0.42544165]], dtype=float32))
(u'Value_hidden_0_256_b:0', array([ 1.78578782, -1.13289773, -1.13282025, -0.42298612, 1.28810048,
0.94630432, -0.77580124, 0.44361007, 0.04997174, -0.2949701 ,
-2.48366261, 1.31528795, -0.06285202, -0.00468678, -2.506387 ,
-0.55885977, 1.06362987, -1.204193 , -0.48487297, 1.55458486,
-1.52615082, -2.68877554, -1.67448926, -0.65940481, -0.62271738,
0.01855731, -0.7258929 , 0.05951614, -0.53181982, 0.20086668,
-0.89433753, 0.6736083 , 0.69640863, -1.21845758, 0.16907932,
0.15665664, 0.11855257, 0.32985669, -0.15142472, 0.31189504,
0.09144444, -0.03490094, -0.25444621, 0.50935078, 0.09275411,
-2.05146599, 0.58101684, -0.29670984, -0.67371017, -1.73532832,
0.12154857, 0.51087117, -1.13208663, -1.12271321, -1.86363411,
0.10401266, 0.15044968, -0.72424656, -0.36257446, -0.68419862,
-0.57464999, -1.15064549, -1.93514812, -0.56139702, -0.85649639,
0.64367473, 0.9909175 , 0.56421548, -0.1430794 , -0.45124927,
-1.1064086 , -0.36086079, -0.72110403, 0.84174943, -0.98650181,
-1.94149423, -0.04776868, 0.38393402, -0.88950181, 1.04378474,
-0.14869215, -1.43632495, 0.32632226, -0.1431836 , -1.57151592,
0.05479164, 0.10934588, -0.00957915, 0.5662905 , -0.34834835,
-1.03081405, 0.1609817 , -0.0333399 , 0.54326504, -0.86303467,
-0.87087929, -1.53543532, -0.48691854, 0.29493874, -0.85416204,
-1.44297171, -1.29493082, 2.12958145, -0.66743213, -0.76062232,
1.62518299, 0.23937477, -0.12400354, 0.83518219, -0.36398768,
0.49152267, 1.45936668, -1.41894484, 0.00934435, -0.99215376,
1.34403741, 0.4798609 , -1.09964502, 0.30879682, -0.81276256,
0.55388832, 0.94047409, 0.82732821, 0.0203611 , 0.43211782,
0.06336299, -0.27788419, -0.43409753, 0.39875785, -0.53929204,
1.56555068, -0.42607853, -0.49289304, 0.69521672, -0.24139374,
-1.19842076, 0.79897332, -1.04562223, -1.19013703, 0.23586881,
-0.27325734, 1.55505276, -0.0351376 , -0.16689534, 1.03409111,
-0.47078049, 0.84009045, 0.18810789, 0.10372163, 0.46226692,
0.12937343, 0.41876575, 0.53730637, 0.10328166, -2.69610214,
1.00925136, 0.92645198, 0.65578389, -1.82254362, -2.16645575,
-2.8766005 , 1.47650242, 0.14861402, 2.8256793 , 0.27014014,
1.4265759 , 0.35382524, -1.98596466, 0.35601348, -0.14413135,
-1.58368075, 0.50708318, 0.80059391, -0.98603845, 1.52372468,
0.38909319, 0.43698898, -0.02397862, 1.27757657, -2.06522536,
0.29193145, 0.63171977, -0.51625419, 0.15927073, -0.54645866,
-0.36382389, -1.57668185, 0.45936915, 0.52007967, -1.04008794,
-0.14481217, -0.14022091, -0.85114437, 0.50103289, 0.12177194,
-0.58680654, 0.06981559, 0.40621197, -1.26794469, -1.13487828,
-0.98860091, -1.55093145, -0.44211796, 1.00627089, -1.25597429,
-0.08978783, -0.73232067, -1.16819251, -1.12819743, -0.27425802,
-0.05575072, 0.23982911, 0.95686126, -1.30108583, 0.67817378,
-1.01362538, 0.37710005, 0.23351078, 1.41106546, 0.85772985,
0.03577228, -1.19685698, -0.56741351, 0.02173218, 0.23132394,
0.40094239, 0.06039136, -0.30147472, -0.90069318, -0.30751887,
-0.71390808, -0.27258736, 0.08877264, 0.28791946, -0.05585598,
-0.28352341, -1.24397123, -0.83353174, 0.23057126, -0.96560329,
-2.67353868, -0.54482394, -1.21689034, -0.31905472, 0.31759185,
0.33549452, 1.54719627, -0.45475188, 0.43433225, -2.50993133,
0.10569964, 0.1059221 , 0.68720323, -0.75011039, -0.08602009,
-1.22999346], dtype=float32))
(u'Value_hidden_1_128_W:0', array([[-0.14418589, -0.42850929, -0.21464546, ..., -0.06631085,
-0.29540265, -0.0019975 ],
[ 0.2855272 , 0.20527977, -0.06880599, ..., -0.17197354,
0.10095111, -0.03926265],
[-0.01212484, -0.07456007, 0.11160903, ..., -0.03752471,
-0.19881198, 0.04098099],
...,
[-0.16665548, -0.1974899 , -0.08213197, ..., -0.07938825,
0.02423345, 0.05182662],
[ 0.31556419, 0.17930953, -0.29400533, ..., -0.06170369,
-0.01636611, 0.11487763],
[-0.09179981, -0.23626348, 0.22912125, ..., -0.09495796,
-0.0976388 , -0.11386864]], dtype=float32))
(u'Value_hidden_1_128_b:0', array([ -1.01658320e+00, 5.87303698e-01, 1.78663242e+00,
9.89543080e-01, -1.04195499e+00, -4.22925800e-01,
-1.71024725e-01, -1.12503541e+00, -2.41476715e-01,
1.07188404e+00, -1.57314610e+00, 9.40307558e-01,
7.82473683e-01, 1.99706957e-01, 6.17127538e-01,
5.94909370e-01, -1.09823585e+00, -1.90611064e+00,
-5.74841380e-01, 3.54786575e-01, -2.85891742e-01,
1.56158197e+00, 5.24273992e-01, -6.35369003e-01,
-3.52004170e-01, -7.98953921e-02, 4.15515691e-01,
-1.78010786e+00, -1.18191898e+00, -1.31441072e-01,
8.32471073e-01, -6.46002352e-01, 7.07944930e-02,
7.39246488e-01, -5.60507953e-01, -1.67373765e+00,
-1.27198994e+00, 2.67984748e-01, -1.33741748e+00,
-4.75583166e-01, 2.60542721e-01, -2.30222130e+00,
1.39873743e+00, 1.71021223e+00, 5.08217573e-01,
1.88018811e+00, -5.94736971e-02, 5.90522110e-01,
2.34033656e+00, -8.69807720e-01, -5.70210278e-01,
-1.83161169e-01, -2.15145636e+00, 1.68457878e+00,
-7.46824384e-01, 4.37178910e-01, -1.23150635e+00,
5.09741083e-02, -1.76321054e+00, -1.30499828e+00,
4.17892385e-04, 1.30033061e-01, 8.54811072e-02,
1.13369799e+00, 1.31753170e+00, -3.74165103e-02,
-9.80801582e-01, -3.04855444e-02, -5.18771768e-01,
1.55449855e+00, -7.25880032e-03, 6.44329116e-02,
3.24879974e-01, -7.47536719e-01, 6.01090230e-02,
-8.51946712e-01, -1.72960863e-01, 7.33298481e-01,
-1.93910956e-01, 4.82474566e-01, 3.53861362e-01,
-8.49573135e-01, -6.64054215e-01, 1.84136018e-01,
-6.72681928e-01, -1.10626435e+00, 3.26317221e-01,
-1.22733390e+00, -1.81250498e-02, -1.58258641e+00,
3.28101933e-01, 1.61578810e+00, 1.39800286e+00,
-1.83842158e+00, 1.16880918e+00, 3.90607379e-02,
-1.28706062e+00, -4.68913674e-01, 2.03105974e+00,
-7.27466643e-01, -1.08109975e+00, 6.83079481e-01,
-1.90836036e+00, 5.45537293e-01, 1.61332941e+00,
-3.14935088e-01, -1.08771574e+00, 3.02766681e-01,
-5.14594652e-02, -1.66216195e+00, 4.52001810e-01,
4.84908909e-01, 3.31739932e-01, 4.81123626e-01,
-2.54795372e-01, -8.96972716e-01, -3.50317925e-01,
-1.75301746e-01, -2.77954483e+00, 6.58656299e-01,
8.21641505e-01, 3.21602553e-01, -1.47721136e+00,
-6.66404665e-01, 1.41406059e+00, 1.26434696e+00,
3.58377472e-02, -6.55694455e-02], dtype=float32))
(u'Value_output_W:0', array([[ 1.26290089e-02],
[ 1.02296157e-03],
[ 2.93469727e-02],
[ 2.55748222e-04],
[ 2.55505331e-02],
[ -1.12278806e-02],
[ 1.20092863e-02],
[ 2.10704848e-01],
[ -3.32192355e-03],
[ -2.83797760e-03],
[ -2.69860458e-02],
[ -3.75492871e-02],
[ 3.95826821e-04],
[ 9.11176391e-03],
[ 2.12944895e-02],
[ -4.05910134e-04],
[ 2.46981494e-02],
[ 1.77628547e-02],
[ 1.23066520e-02],
[ 8.31457414e-03],
[ -3.40282358e-02],
[ -4.59731482e-02],
[ 3.49002104e-04],
[ 8.33359547e-03],
[ -4.70384868e-04],
[ 9.14582796e-03],
[ -1.64422132e-02],
[ 2.07572593e-04],
[ -3.29212256e-04],
[ 6.17123358e-02],
[ 1.47433151e-02],
[ -3.71799394e-02],
[ -1.50996521e-02],
[ -8.74559674e-03],
[ -6.91055357e-02],
[ 4.15263437e-02],
[ -1.56191532e-02],
[ 4.19244543e-03],
[ 5.79394512e-02],
[ -1.48357332e-01],
[ -1.01783397e-02],
[ -2.96216283e-04],
[ -2.85039376e-02],
[ 5.73970191e-03],
[ -1.46215828e-02],
[ 2.20351815e-02],
[ -2.56867446e-02],
[ -2.07626093e-02],
[ 1.30289979e-03],
[ -2.85687838e-02],
[ -2.99945772e-02],
[ -4.71197302e-04],
[ -2.95408932e-03],
[ 6.35127490e-03],
[ 5.27186552e-04],
[ 9.22226254e-03],
[ -2.79514156e-02],
[ 1.43927895e-02],
[ -6.36742264e-02],
[ 7.87790306e-03],
[ 6.06658682e-03],
[ 6.11642823e-02],
[ -4.52078059e-02],
[ -3.21502946e-02],
[ -2.11422388e-02],
[ -1.73612013e-02],
[ -2.28573233e-02],
[ -1.79550126e-02],
[ -3.65101956e-02],
[ 5.63557260e-04],
[ -3.52974534e-02],
[ -1.04180137e-02],
[ -9.65785235e-03],
[ 8.93031713e-03],
[ 8.28750059e-03],
[ -1.66633204e-02],
[ 1.83509625e-02],
[ 5.01403399e-03],
[ -6.73705488e-02],
[ 5.16280383e-02],
[ 8.23243707e-03],
[ -4.96383011e-03],
[ 9.42009268e-04],
[ 8.22461490e-03],
[ 3.77492537e-03],
[ -9.98212248e-02],
[ -9.98805463e-03],
[ -4.67201583e-02],
[ 8.60015675e-02],
[ 8.99988413e-03],
[ -2.45901253e-02],
[ -1.05488701e-02],
[ -8.93954411e-02],
[ 2.48826202e-02],
[ -3.28371599e-02],
[ 2.15401947e-02],
[ 9.72577929e-02],
[ -1.19363274e-02],
[ -1.47199463e-02],
[ 1.91697896e-01],
[ -7.46116461e-03],
[ 5.59695959e-02],
[ 1.44438958e-03],
[ 2.37609781e-02],
[ 8.35738319e-06],
[ -9.31871310e-03],
[ -2.20832769e-02],
[ 1.07680289e-02],
[ 6.19172724e-03],
[ 3.06163784e-02],
[ 2.92637739e-02],
[ 6.60690386e-03],
[ 3.10995325e-04],
[ 3.41241423e-04],
[ -3.32077853e-02],
[ -3.87130193e-02],
[ -1.84710752e-02],
[ -2.66821263e-03],
[ -4.97201756e-02],
[ 2.17761490e-02],
[ 3.71829942e-02],
[ 3.12109548e-03],
[ -1.09653464e-02],
[ -8.46509337e-02],
[ 1.03776995e-02],
[ 8.87018803e-04],
[ -5.89873120e-02],
[ 3.23366141e-03]], dtype=float32))
(u'Value_output_b:0', array([-1.97871304], dtype=float32))
In [ ]:
Content source: karolkuna/reinforcement-learning
Similar notebooks: