In [1]:
%matplotlib inline
In [2]:
import gym
import tensorflow as tf
import numpy as np
import math
from tqdm import tqdm, trange
import random
import time
In [3]:
from experiencereplay import ExperienceReplay, PrioritizedExperienceReplay, ModelBasedPrioritizedExperienceReplay
from experiment import Experiment
from dmlac import DMLAC
import nn
from exploration import EpsilonGreedyStrategy, OUStrategy
from ounoise import OUNoise
In [4]:
settings = dict(
environment = 'MountainCarContinuous-v0',
timesteps = 8000,
max_replay_buffer_size = 100000,
batch_size = 64,
learning_start = 256,
forward_steps = 1,
discount_factor = 0.99,
trace_decay = 0.9,
actor_learning_rate=0.0001,
model_learning_rate=0.001,
reward_learning_rate=0.001,
value_learning_rate=0.001,
actor_l2=None,
model_l2=None,
reward_l2=None,
value_l2=None,
actor_target_approach_rate=0.99,
value_target_approach_rate=0.99,
train_updates_per_step = 10,
priority_updates_per_step = 100,
actor_net_layers = [256, 128],
actor_net_activation_fn = tf.nn.elu,
actor_bounded_output = True,
value_net_layers = [256, 128],
value_net_activation_fn = tf.nn.elu,
model_net_embedding = 128,
model_net_layers = [128],
model_net_activation_fn = tf.nn.elu,
reward_net_embedding = 128,
reward_net_layers = [128],
reward_net_activation_fn = tf.nn.elu,
environment_seed = 0,
noise_seed= 0,
gpu_memory_fraction = 0.1,
render_environment = True,
render_frequency = 10,
render_start=7000,
)
settings["experiment_path"] = "experiments/experiment_dmlac_{}_{}".format(settings["environment"], int(time.time()))
settings["actor_tf_optimizer"] = tf.train.AdamOptimizer(settings["actor_learning_rate"])
settings["model_tf_optimizer"] = tf.train.AdamOptimizer(settings["model_learning_rate"])
settings["reward_tf_optimizer"] = tf.train.AdamOptimizer(settings["reward_learning_rate"])
settings["value_tf_optimizer"] = tf.train.AdamOptimizer(settings["value_learning_rate"])
print(settings["experiment_path"])
experiments/experiment_dmlac_MountainCarContinuous-v0_1495409405
In [5]:
def preprocess_state(observation):
state = np.array(observation)
if settings["environment"] == "MountainCarContinuous-v0":
state[1] = state[1] * 10
return state
else:
return state
def preprocess_reward(reward):
return reward
In [6]:
env = gym.make(settings["environment"])
env.seed(settings["environment_seed"])
observation = preprocess_state(env.reset())
state = observation
[2017-05-22 01:30:05,805] Making new env: MountainCarContinuous-v0
In [7]:
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
print(state_dim)
print(action_dim)
print(env.observation_space.high)
print(env.observation_space.low)
print(env.action_space.high)
print(env.action_space.low)
2
1
[ 0.6 0.07]
[-1.2 -0.07]
[ 1.]
[-1.]
In [8]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=settings["gpu_memory_fraction"])
session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
summary_writer = tf.summary.FileWriter(settings["experiment_path"] + "/logdir")
In [9]:
actor_network = nn.fully_connected("Actor", session, [state_dim], settings["actor_net_layers"],
action_dim, settings["actor_net_activation_fn"],
env.action_space if settings["actor_bounded_output"] else None, False)
model_network = nn.fully_connected_with_input_embedding(
"Model", session, [state_dim, action_dim], settings["model_net_embedding"], settings["model_net_layers"], state_dim,
settings["model_net_activation_fn"], None, False)
reward_network = nn.fully_connected_with_input_embedding(
"Reward", session, [state_dim, action_dim], settings["reward_net_embedding"], settings["reward_net_layers"], 1,
settings["reward_net_activation_fn"], None, False)
value_network = nn.fully_connected("Value", session, [state_dim], settings["value_net_layers"], 1,
settings["value_net_activation_fn"], None, False)
print(str(actor_network))
[] --> Actor_input_0
['Actor_input_0'] --> hidden_0_256
['hidden_0_256'] --> hidden_1_128
['hidden_1_128'] --> output
['output'] --> bounding
In [10]:
agent = DMLAC(actor_network, model_network, reward_network, value_network,
forward_steps=settings["forward_steps"],
discount_factor=settings["discount_factor"],
trace_decay=settings["trace_decay"],
actor_tf_optimizer=settings["actor_tf_optimizer"],
model_tf_optimizer=settings["model_tf_optimizer"],
reward_tf_optimizer=settings["reward_tf_optimizer"],
value_tf_optimizer=settings["value_tf_optimizer"],
actor_l2=settings["actor_l2"],
model_l2=settings["model_l2"],
reward_l2=settings["reward_l2"],
value_l2=settings["value_l2"],
actor_target_approach_rate=settings["actor_target_approach_rate"],
value_target_approach_rate=settings["value_target_approach_rate"],
summary_writer=summary_writer
)
In [11]:
saver = tf.train.Saver()
summary_writer.add_graph(session.graph)
In [12]:
experience_replay = ModelBasedPrioritizedExperienceReplay(agent, env, settings["max_replay_buffer_size"], episodic=True)
In [13]:
noise = OUNoise(action_dim, seed=settings["noise_seed"])
In [14]:
exp = Experiment(settings["experiment_path"], session, env, settings, settings["render_environment"], settings["render_frequency"], settings["render_start"])
progress_bar = tqdm(total=settings["timesteps"])
e_t = -1
for t in xrange(settings["timesteps"]):
e_t += 1
if t < settings["learning_start"]:
action = noise.noise()
else:
action = agent.action(state)
observation, reward, done, info = env.step(action)
next_state = np.reshape(preprocess_state(observation), (state_dim,))
experience_replay.add_experience(state, action, preprocess_reward(reward), next_state, done)
td_error = math.fabs(experience_replay.get_last_td_error())
model_error = experience_replay.get_last_model_error()
reward_error = experience_replay.get_last_reward_error()
exp.record(t, state, action, reward, next_state, done, td_error, model_error, reward_error)
state = next_state
if done:
exp.print_last_episode_info()
observation = env.reset()
state = preprocess_state(observation)
e_t = -1
if t >= settings["learning_start"]:
experience_replay.train_agent(settings["batch_size"], settings["train_updates_per_step"])
experience_replay.update_oldest_priorities(settings["priority_updates_per_step"])
progress_bar.set_description('[{}] reward: {:.2f}, reward 100-step MA: {:.2f}, action: {}, td-error: {:.4f}, model error: {:.4f}, reward error: {:.4f}' \
.format(t, reward, exp.reward_100ma.get_average(), str(action), td_error, model_error, reward_error))
progress_bar.update()
progress_bar.close()
[174] reward: -0.00, reward 100-step MA: 0.98, action: [-0.07092898], td-error: 0.0000, model error: 0.0031, reward error: 2.0657: 2%|▏ | 175/8000 [00:01<06:54, 18.87it/s] | 1/8000 [00:00<56:34, 2.36it/s]
Total episode reward: 96.4231978292. Finished in 130 steps.
[306] reward: -0.10, reward 100-step MA: -0.05, action: [ 0.98711097], td-error: 25.2040, model error: 0.0001, reward error: 618.8727: 4%|▍ | 307/8000 [00:10<15:50, 8.10it/s]] reward: -0.01, reward 100-step MA: -0.01, action: [-0.3637961], td-error: 0.0000, model error: 0.0507, reward error: 2.5737: 3%|▎ | 234/8000 [00:01<03:35, 36.09it/s]
Total episode reward: 93.5231650728. Finished in 179 steps.
[1695] reward: 99.91, reward 100-step MA: 0.92, action: [ 0.95290232], td-error: 134.8447, model error: 0.0000, reward error: 16067.3672: 21%|██ | 1696/8000 [04:35<20:29, 5.13it/s]] reward: -0.01, reward 100-step MA: 0.95, action: [ 0.23008931], td-error: 0.0000, model error: 0.0000, reward error: 6.6592: 4%|▍ | 321/8000 [00:12<18:29, 6.92it/s]
Total episode reward: -13.8869715649. Finished in 1387 steps.
[1819] reward: 99.99, reward 100-step MA: 0.85, action: [ 0.3119626], td-error: 782.9854, model error: 0.0024, reward error: 12847.1084: 23%|██▎ | 1820/8000 [04:59<19:57, 5.16it/s] [1697] reward: -0.02, reward 100-step MA: 0.92, action: [-0.41467035], td-error: 0.0000, model error: 0.0018, reward error: 154.1618: 21%|██ | 1698/8000 [04:35<20:14, 5.19it/s]
Total episode reward: 84.1286960661. Finished in 124 steps.
[2150] reward: 99.60, reward 100-step MA: 0.89, action: [ 1.98852909], td-error: 543.6807, model error: 0.0001, reward error: 1584.3918: 27%|██▋ | 2151/8000 [06:04<19:17, 5.05it/s][1821] reward: -0.05, reward 100-step MA: 0.85, action: [-0.71537304], td-error: 0.0000, model error: 0.0034, reward error: 1623.8997: 23%|██▎ | 1822/8000 [04:59<19:50, 5.19it/s]
Total episode reward: 63.6965154201. Finished in 331 steps.
[2314] reward: 99.90, reward 100-step MA: 0.94, action: [ 1.01177847], td-error: 0.0000, model error: 0.0017, reward error: 864.4659: 29%|██▉ | 2315/8000 [06:36<18:35, 5.10it/s] [2152] reward: -0.01, reward 100-step MA: 0.89, action: [-0.34694254], td-error: 30.0261, model error: 0.0011, reward error: 3.0154: 27%|██▋ | 2153/8000 [06:04<19:12, 5.07it/s]
Total episode reward: 92.4477668174. Finished in 164 steps.
[2435] reward: 99.87, reward 100-step MA: 0.93, action: [ 1.15491557], td-error: 187.1152, model error: 0.0005, reward error: 59.9050: 30%|███ | 2436/8000 [07:00<18:21, 5.05it/s] 316] reward: -0.08, reward 100-step MA: 0.94, action: [ 0.89678347], td-error: 0.0000, model error: 0.0005, reward error: 23.1924: 29%|██▉ | 2317/8000 [06:37<19:20, 4.90it/s]
Total episode reward: 91.3727454161. Finished in 121 steps.
[2518] reward: 99.83, reward 100-step MA: 1.93, action: [ 1.29782808], td-error: 604.0186, model error: 0.0001, reward error: 4392.6660: 31%|███▏ | 2519/8000 [07:17<18:21, 4.97it/s]437] reward: -0.04, reward 100-step MA: 0.94, action: [-0.61635572], td-error: 0.0000, model error: 0.0013, reward error: 0.5953: 30%|███ | 2438/8000 [07:01<18:14, 5.08it/s]
Total episode reward: 95.0047661277. Finished in 83 steps.
[2659] reward: 99.91, reward 100-step MA: 0.94, action: [ 0.94806051], td-error: 214.4082, model error: 0.0003, reward error: 622.4498: 33%|███▎ | 2660/8000 [07:45<17:38, 5.05it/s] [2520] reward: -0.08, reward 100-step MA: 1.93, action: [-0.90444177], td-error: 40.6895, model error: 0.0005, reward error: 0.0553: 32%|███▏ | 2521/8000 [07:17<18:14, 5.00it/s]
Total episode reward: 91.8093924948. Finished in 141 steps.
[2735] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.99506879], td-error: 290.3008, model error: 0.0002, reward error: 399.9548: 34%|███▍ | 2736/8000 [08:00<17:22, 5.05it/s][2661] reward: -0.03, reward 100-step MA: 0.94, action: [-0.53973788], td-error: 0.0000, model error: 0.0021, reward error: 0.0532: 33%|███▎ | 2662/8000 [07:45<17:45, 5.01it/s]
Total episode reward: 94.5837537746. Finished in 76 steps.
[2879] reward: 99.91, reward 100-step MA: 0.95, action: [ 0.9718256], td-error: 942.3574, model error: 0.0000, reward error: 243.0125: 36%|███▌ | 2880/8000 [08:28<17:01, 5.01it/s] 2737] reward: -0.05, reward 100-step MA: 1.92, action: [-0.69772553], td-error: 12.2798, model error: 0.0004, reward error: 0.5264: 34%|███▍ | 2738/8000 [08:00<17:08, 5.12it/s]
Total episode reward: 93.6874602827. Finished in 144 steps.
[2954] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.9912833], td-error: 801.2500, model error: 0.0001, reward error: 1121.4849: 37%|███▋ | 2955/8000 [08:43<16:56, 4.97it/s]2881] reward: -0.07, reward 100-step MA: 0.95, action: [-0.81734598], td-error: 3.5518, model error: 0.0003, reward error: 0.1470: 36%|███▌ | 2882/8000 [08:29<16:50, 5.06it/s]
Total episode reward: 95.2824589813. Finished in 75 steps.
[3099] reward: 99.90, reward 100-step MA: 0.95, action: [ 0.98041534], td-error: 1076.8770, model error: 0.0007, reward error: 272.2931: 39%|███▉ | 3100/8000 [09:12<16:34, 4.93it/s]2956] reward: -0.09, reward 100-step MA: 1.93, action: [-0.94886076], td-error: 46.8257, model error: 0.0001, reward error: 15.4871: 37%|███▋ | 2957/8000 [08:44<17:00, 4.94it/s]
Total episode reward: 93.7055666543. Finished in 145 steps.
[3180] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.98563802], td-error: 284.2695, model error: 0.0007, reward error: 34.6130: 40%|███▉ | 3181/8000 [09:28<16:05, 4.99it/s] [3101] reward: -0.14, reward 100-step MA: 0.95, action: [-1.16526079], td-error: 0.0000, model error: 0.0018, reward error: 0.0202: 39%|███▉ | 3102/8000 [09:13<16:19, 5.00it/s]
Total episode reward: 94.8222190916. Finished in 81 steps.
[3255] reward: 99.90, reward 100-step MA: 1.93, action: [ 1.00092041], td-error: 1437.4727, model error: 0.0002, reward error: 5479.7212: 41%|████ | 3256/8000 [09:43<15:44, 5.02it/s]82] reward: -0.10, reward 100-step MA: 1.93, action: [-0.9911927], td-error: 12.9302, model error: 0.0002, reward error: 0.0260: 40%|███▉ | 3183/8000 [09:29<16:08, 4.97it/s]
Total episode reward: 95.3206187029. Finished in 75 steps.
[3329] reward: -0.10, reward 100-step MA: 0.92, action: [ 0.99487031], td-error: 655.1602, model error: 0.0001, reward error: 40.0274: 42%|████▏ | 3330/8000 [09:58<15:33, 5.00it/s] [3257] reward: -0.11, reward 100-step MA: 1.93, action: [-1.06746531], td-error: 19.7520, model error: 0.0000, reward error: 56.3410: 41%|████ | 3258/8000 [09:44<15:52, 4.98it/s]
Total episode reward: 95.030240833. Finished in 75 steps.
[3409] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.98966575], td-error: 503.1699, model error: 0.0003, reward error: 188.7018: 43%|████▎ | 3410/8000 [10:14<15:28, 4.95it/s]][3331] reward: -0.09, reward 100-step MA: 1.92, action: [-0.94382316], td-error: 0.0000, model error: 0.0006, reward error: 0.9238: 42%|████▏ | 3332/8000 [09:59<15:43, 4.95it/s]
Total episode reward: 94.5742948659. Finished in 79 steps.
[3483] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.97474086], td-error: 918.9492, model error: 0.0002, reward error: 6.7982: 44%|████▎ | 3484/8000 [10:29<15:09, 4.97it/s] [3411] reward: -0.11, reward 100-step MA: 1.92, action: [-1.03424931], td-error: 139.5518, model error: 0.0000, reward error: 0.4133: 43%|████▎ | 3412/8000 [10:15<15:19, 4.99it/s]
Total episode reward: 95.0923479071. Finished in 74 steps.
[3631] reward: 99.90, reward 100-step MA: 0.95, action: [ 1.00145221], td-error: 902.7031, model error: 0.0004, reward error: 1497.1527: 45%|████▌ | 3632/8000 [10:59<14:49, 4.91it/s]85] reward: -0.10, reward 100-step MA: 1.92, action: [-0.98203981], td-error: 0.0000, model error: 0.0001, reward error: 0.2068: 44%|████▎ | 3486/8000 [10:30<15:10, 4.96it/s]
Total episode reward: 92.6124776109. Finished in 148 steps.
[3708] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.97811317], td-error: 1086.6914, model error: 0.0006, reward error: 506.8153: 46%|████▋ | 3709/8000 [11:14<14:43, 4.86it/s][3633] reward: -0.11, reward 100-step MA: 0.95, action: [-1.02785206], td-error: 307.9521, model error: 0.0000, reward error: 27.7952: 45%|████▌ | 3634/8000 [10:59<14:56, 4.87it/s]
Total episode reward: 95.1492344202. Finished in 77 steps.
[3787] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.99585497], td-error: 2309.5820, model error: 0.0005, reward error: 65.9612: 47%|████▋ | 3788/8000 [11:30<14:24, 4.87it/s] [3710] reward: -0.11, reward 100-step MA: 1.93, action: [-1.04609823], td-error: 256.3818, model error: 0.0000, reward error: 15.2844: 46%|████▋ | 3711/8000 [11:15<14:37, 4.89it/s]
Total episode reward: 94.8678545986. Finished in 79 steps.
[3949] reward: 99.90, reward 100-step MA: 0.94, action: [ 1.00653851], td-error: 1091.6953, model error: 0.0009, reward error: 582.3171: 49%|████▉ | 3950/8000 [12:03<13:59, 4.83it/s] 89] reward: -0.10, reward 100-step MA: 1.93, action: [-0.98602957], td-error: 307.8262, model error: 0.0001, reward error: 0.4177: 47%|████▋ | 3790/8000 [11:31<14:01, 5.00it/s]
Total episode reward: 90.3256358579. Finished in 162 steps.
[4029] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.98892784], td-error: 2208.8516, model error: 0.0003, reward error: 1473.5466: 50%|█████ | 4030/8000 [12:19<13:27, 4.91it/s]3951] reward: -0.11, reward 100-step MA: 0.94, action: [-1.06735146], td-error: 108.5859, model error: 0.0001, reward error: 6.5596: 49%|████▉ | 3952/8000 [12:03<13:45, 4.91it/s]
Total episode reward: 94.2026321658. Finished in 80 steps.
[4103] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.99534035], td-error: 1083.8281, model error: 0.0007, reward error: 339.0142: 51%|█████▏ | 4104/8000 [12:34<13:13, 4.91it/s] [4031] reward: -0.11, reward 100-step MA: 1.92, action: [-1.03191257], td-error: 35.2227, model error: 0.0001, reward error: 0.4022: 50%|█████ | 4032/8000 [12:20<13:18, 4.97it/s]
Total episode reward: 94.4505720697. Finished in 74 steps.
[4176] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.9959271], td-error: 4162.9219, model error: 0.0002, reward error: 1242.0801: 52%|█████▏ | 4177/8000 [12:49<12:38, 5.04it/s][4105] reward: -0.11, reward 100-step MA: 1.92, action: [-1.06371951], td-error: 100.0859, model error: 0.0001, reward error: 4.1703: 51%|█████▏ | 4106/8000 [12:35<13:03, 4.97it/s]
Total episode reward: 94.6139781381. Finished in 73 steps.
[4248] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.99742329], td-error: 3727.8125, model error: 0.0001, reward error: 209.8219: 53%|█████▎ | 4249/8000 [13:04<12:31, 4.99it/s][4178] reward: -0.11, reward 100-step MA: 1.92, action: [-1.04347324], td-error: 292.0566, model error: 0.0002, reward error: 0.3191: 52%|█████▏ | 4179/8000 [12:49<12:43, 5.00it/s]
Total episode reward: 94.5663274728. Finished in 72 steps.
[4319] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00802863], td-error: 2581.1562, model error: 0.0003, reward error: 226.0249: 54%|█████▍ | 4320/8000 [13:17<10:47, 5.69it/s][4250] reward: -0.12, reward 100-step MA: 1.92, action: [-1.10661185], td-error: 234.4160, model error: 0.0000, reward error: 0.5828: 53%|█████▎ | 4251/8000 [13:04<12:36, 4.95it/s]
Total episode reward: 94.3286504071. Finished in 71 steps.
[4394] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.98692739], td-error: 3062.4141, model error: 0.0001, reward error: 36.8419: 55%|█████▍ | 4395/8000 [13:30<10:38, 5.65it/s] [4321] reward: -0.07, reward 100-step MA: 1.92, action: [-0.83494484], td-error: 0.0000, model error: 0.0001, reward error: 0.5059: 54%|█████▍ | 4322/8000 [13:17<10:42, 5.73it/s]
Total episode reward: 94.0069662674. Finished in 75 steps.
[4465] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.97635305], td-error: 3472.7969, model error: 0.0001, reward error: 0.1824: 56%|█████▌ | 4466/8000 [13:43<10:42, 5.50it/s] [4396] reward: -0.12, reward 100-step MA: 1.92, action: [-1.10470295], td-error: 358.2227, model error: 0.0000, reward error: 0.9168: 55%|█████▍ | 4397/8000 [13:31<10:29, 5.72it/s]
Total episode reward: 94.3249927012. Finished in 71 steps.
[4537] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.02158332], td-error: 1359.8125, model error: 0.0003, reward error: 143.6627: 57%|█████▋ | 4538/8000 [13:56<10:13, 5.64it/s]467] reward: -0.09, reward 100-step MA: 1.91, action: [-0.94578326], td-error: 0.0000, model error: 0.0002, reward error: 2.2365: 56%|█████▌ | 4468/8000 [13:43<10:40, 5.51it/s]
Total episode reward: 94.1926985075. Finished in 72 steps.
[4609] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98690319], td-error: 4752.4531, model error: 0.0000, reward error: 0.9344: 58%|█████▊ | 4610/8000 [14:08<09:56, 5.68it/s] [4539] reward: -0.08, reward 100-step MA: 1.91, action: [-0.90795952], td-error: 47.3906, model error: 0.0002, reward error: 0.0458: 57%|█████▋ | 4540/8000 [13:56<10:08, 5.69it/s]
Total episode reward: 94.0130174927. Finished in 72 steps.
[4680] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98882532], td-error: 3905.5938, model error: 0.0002, reward error: 14.7974: 59%|█████▊ | 4681/8000 [14:21<09:46, 5.66it/s]4611] reward: -0.10, reward 100-step MA: 1.91, action: [-1.00080895], td-error: 0.0000, model error: 0.0003, reward error: 0.4150: 58%|█████▊ | 4612/8000 [14:09<09:55, 5.69it/s]
Total episode reward: 93.7941235496. Finished in 71 steps.
[4750] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00387061], td-error: 5898.9062, model error: 0.0002, reward error: 19.8732: 59%|█████▉ | 4751/8000 [14:33<09:33, 5.67it/s][4682] reward: -0.11, reward 100-step MA: 1.91, action: [-1.02825725], td-error: 544.4648, model error: 0.0000, reward error: 0.0009: 59%|█████▊ | 4683/8000 [14:21<09:42, 5.70it/s]
Total episode reward: 93.7409767543. Finished in 70 steps.
[4821] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98664117], td-error: 7844.2812, model error: 0.0001, reward error: 116.7266: 60%|██████ | 4822/8000 [14:46<09:18, 5.69it/s]4752] reward: -0.12, reward 100-step MA: 1.91, action: [-1.08858573], td-error: 1490.3945, model error: 0.0000, reward error: 7.6123: 59%|█████▉ | 4753/8000 [14:34<09:38, 5.61it/s]
Total episode reward: 94.101249705. Finished in 71 steps.
[4897] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99818778], td-error: 8098.2188, model error: 0.0002, reward error: 0.3148: 61%|██████ | 4898/8000 [14:59<09:13, 5.60it/s] [4823] reward: -0.10, reward 100-step MA: 1.91, action: [-1.02244425], td-error: 941.8750, model error: 0.0000, reward error: 1.5026: 60%|██████ | 4824/8000 [14:46<09:17, 5.70it/s]
Total episode reward: 93.3849713019. Finished in 76 steps.
[4973] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99267745], td-error: 0.0000, model error: 0.0007, reward error: 209.5989: 62%|██████▏ | 4974/8000 [15:13<08:55, 5.65it/s] 899] reward: -0.10, reward 100-step MA: 1.91, action: [-1.01576245], td-error: 132.7969, model error: 0.0001, reward error: 2.7258: 61%|██████▏ | 4900/8000 [15:00<09:11, 5.63it/s]
Total episode reward: 93.1590904891. Finished in 76 steps.
[5045] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00971055], td-error: 725.8438, model error: 0.0005, reward error: 1.1238: 63%|██████▎ | 5046/8000 [15:26<08:55, 5.52it/s] 975] reward: -0.10, reward 100-step MA: 1.91, action: [-1.01912498], td-error: 1398.4453, model error: 0.0001, reward error: 0.0080: 62%|██████▏ | 4976/8000 [15:13<09:04, 5.56it/s]
Total episode reward: 93.8844160348. Finished in 72 steps.
[5118] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.9998523], td-error: 10294.0312, model error: 0.0001, reward error: 32.2812: 64%|██████▍ | 5119/8000 [15:39<08:37, 5.57it/s] 47] reward: -0.10, reward 100-step MA: 1.91, action: [-0.98514611], td-error: 0.0000, model error: 0.0002, reward error: 0.0349: 63%|██████▎ | 5048/8000 [15:26<08:55, 5.51it/s]
Total episode reward: 93.4291041701. Finished in 73 steps.
[5189] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99092078], td-error: 5854.1875, model error: 0.0003, reward error: 111.8748: 65%|██████▍ | 5190/8000 [15:51<08:20, 5.62it/s]5120] reward: -0.10, reward 100-step MA: 1.91, action: [-0.98187017], td-error: 981.4922, model error: 0.0000, reward error: 0.2890: 64%|██████▍ | 5121/8000 [15:39<08:30, 5.64it/s]
Total episode reward: 93.6595448984. Finished in 71 steps.
[5260] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00127995], td-error: 1309.2500, model error: 0.0006, reward error: 6.6576: 66%|██████▌ | 5261/8000 [16:04<08:06, 5.63it/s] [5191] reward: -0.08, reward 100-step MA: 1.91, action: [-0.87972558], td-error: 533.0391, model error: 0.0001, reward error: 13.9041: 65%|██████▍ | 5192/8000 [15:52<08:15, 5.67it/s]
Total episode reward: 93.7073365383. Finished in 71 steps.
[5330] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.01399302], td-error: 5317.9688, model error: 0.0002, reward error: 0.5838: 67%|██████▋ | 5331/8000 [16:16<07:59, 5.57it/s] 262] reward: -0.06, reward 100-step MA: 1.91, action: [-0.78802913], td-error: 0.0000, model error: 0.0004, reward error: 0.4686: 66%|██████▌ | 5263/8000 [16:04<08:07, 5.62it/s]
Total episode reward: 93.7653968748. Finished in 70 steps.
[5401] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.97575021], td-error: 24652.0000, model error: 0.0002, reward error: 101.4299: 68%|██████▊ | 5402/8000 [16:29<07:48, 5.54it/s]32] reward: -0.11, reward 100-step MA: 1.91, action: [-1.04186809], td-error: 2283.7578, model error: 0.0000, reward error: 0.0243: 67%|██████▋ | 5333/8000 [16:17<07:55, 5.61it/s]
Total episode reward: 93.6519493879. Finished in 71 steps.
[5497] reward: 99.99, reward 100-step MA: 1.94, action: [ 0.33883393], td-error: 0.0000, model error: 0.0002, reward error: 3022.5708: 69%|██████▊ | 5498/8000 [16:46<07:26, 5.60it/s] [5403] reward: -0.02, reward 100-step MA: 1.91, action: [-0.42040992], td-error: 0.0000, model error: 0.0000, reward error: 0.3995: 68%|██████▊ | 5404/8000 [16:29<07:46, 5.57it/s]
Total episode reward: 94.2030324598. Finished in 96 steps.
[5569] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99711835], td-error: 19448.1875, model error: 0.0001, reward error: 64.4392: 70%|██████▉ | 5570/8000 [16:59<07:16, 5.57it/s]499] reward: -0.05, reward 100-step MA: 1.94, action: [-0.70899582], td-error: 0.0000, model error: 0.0001, reward error: 4.5246: 69%|██████▉ | 5500/8000 [16:46<07:24, 5.62it/s]
Total episode reward: 93.5501463559. Finished in 72 steps.
[5642] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.021644], td-error: 17567.5625, model error: 0.0001, reward error: 49.9054: 71%|███████ | 5643/8000 [17:12<07:02, 5.58it/s] [5571] reward: -0.11, reward 100-step MA: 1.91, action: [-1.02588367], td-error: 3794.4844, model error: 0.0000, reward error: 1.4887: 70%|██████▉ | 5572/8000 [16:59<07:12, 5.62it/s]
Total episode reward: 93.389277264. Finished in 73 steps.
[5715] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99834955], td-error: 16039.4375, model error: 0.0000, reward error: 16.5300: 71%|███████▏ | 5716/8000 [17:25<06:47, 5.61it/s]644] reward: -0.04, reward 100-step MA: 1.91, action: [-0.62854755], td-error: 0.0000, model error: 0.0000, reward error: 0.0202: 71%|███████ | 5645/8000 [17:12<07:00, 5.59it/s]
Total episode reward: 93.380181634. Finished in 73 steps.
[5787] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00111783], td-error: 39353.5000, model error: 0.0004, reward error: 79.6178: 72%|███████▏ | 5788/8000 [17:38<06:38, 5.55it/s][5717] reward: -0.11, reward 100-step MA: 1.91, action: [-1.05010939], td-error: 0.0000, model error: 0.0009, reward error: 0.0698: 71%|███████▏ | 5718/8000 [17:25<06:47, 5.59it/s]
Total episode reward: 93.339612244. Finished in 72 steps.
[5859] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00086856], td-error: 17437.3125, model error: 0.0001, reward error: 0.3788: 73%|███████▎ | 5860/8000 [17:51<06:27, 5.52it/s] [5789] reward: -0.09, reward 100-step MA: 1.91, action: [-0.93721908], td-error: 1578.4375, model error: 0.0000, reward error: 2.3280: 72%|███████▏ | 5790/8000 [17:38<06:33, 5.62it/s]
Total episode reward: 93.4486696729. Finished in 72 steps.
[5931] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00103521], td-error: 20909.2500, model error: 0.0002, reward error: 0.1838: 74%|███████▍ | 5932/8000 [18:04<06:10, 5.59it/s] 5861] reward: -0.00, reward 100-step MA: 1.91, action: [-0.22270525], td-error: 0.0000, model error: 0.0000, reward error: 0.1423: 73%|███████▎ | 5862/8000 [17:51<06:28, 5.51it/s]
Total episode reward: 93.4269928606. Finished in 72 steps.
[6002] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00126088], td-error: 23932.3750, model error: 0.0001, reward error: 0.0883: 75%|███████▌ | 6003/8000 [18:16<06:08, 5.42it/s] 5933] reward: -0.11, reward 100-step MA: 1.91, action: [-1.04868412], td-error: 4970.4219, model error: 0.0001, reward error: 0.1952: 74%|███████▍ | 5934/8000 [18:04<06:12, 5.55it/s]
Total episode reward: 93.3333530103. Finished in 71 steps.
[6114] reward: 99.90, reward 100-step MA: 0.92, action: [ 1.01420605], td-error: 23040.3750, model error: 0.0004, reward error: 2.1225: 76%|███████▋ | 6115/8000 [18:36<05:38, 5.58it/s] 004] reward: -0.01, reward 100-step MA: 1.91, action: [-0.27053785], td-error: 0.0000, model error: 0.0001, reward error: 5.3422: 75%|███████▌ | 6005/8000 [18:17<06:10, 5.39it/s]
Total episode reward: 91.6741434448. Finished in 112 steps.
[6188] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98921239], td-error: 25949.4375, model error: 0.0003, reward error: 75.4884: 77%|███████▋ | 6189/8000 [18:50<05:29, 5.50it/s]6116] reward: -0.03, reward 100-step MA: 0.92, action: [-0.51721132], td-error: 0.0000, model error: 0.0000, reward error: 7.7200: 76%|███████▋ | 6117/8000 [18:37<05:41, 5.51it/s]
Total episode reward: 93.5686943771. Finished in 74 steps.
[6260] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.01850045], td-error: 38120.8750, model error: 0.0000, reward error: 78.2413: 78%|███████▊ | 6261/8000 [19:03<05:10, 5.60it/s][6190] reward: -0.11, reward 100-step MA: 1.91, action: [-1.03821003], td-error: 4561.7812, model error: 0.0000, reward error: 0.6163: 77%|███████▋ | 6191/8000 [18:50<05:28, 5.50it/s]
Total episode reward: 93.1345810918. Finished in 72 steps.
[6331] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.9989115], td-error: 39253.7500, model error: 0.0001, reward error: 3.2541: 79%|███████▉ | 6332/8000 [19:16<05:00, 5.55it/s] [6262] reward: -0.05, reward 100-step MA: 1.90, action: [-0.70389664], td-error: 0.0000, model error: 0.0001, reward error: 0.2201: 78%|███████▊ | 6263/8000 [19:03<05:10, 5.60it/s]
Total episode reward: 93.4222280927. Finished in 71 steps.
[6403] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.97811031], td-error: 22837.6250, model error: 0.0001, reward error: 1650.8687: 80%|████████ | 6404/8000 [19:29<04:50, 5.50it/s]3] reward: -0.09, reward 100-step MA: 1.91, action: [-0.93731892], td-error: 2974.4062, model error: 0.0002, reward error: 2.7135: 79%|███████▉ | 6334/8000 [19:16<04:58, 5.58it/s]
Total episode reward: 93.3922931207. Finished in 72 steps.
[6474] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00635314], td-error: 18923.6250, model error: 0.0005, reward error: 12.2901: 81%|████████ | 6475/8000 [19:41<04:34, 5.55it/s] [6405] reward: -0.09, reward 100-step MA: 1.91, action: [-0.94413006], td-error: 178.2500, model error: 0.0000, reward error: 2.6618: 80%|████████ | 6406/8000 [19:29<04:49, 5.51it/s]
Total episode reward: 93.4017595511. Finished in 71 steps.
[6545] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00275207], td-error: 32210.8750, model error: 0.0002, reward error: 67.2621: 82%|████████▏ | 6546/8000 [19:54<04:23, 5.51it/s][6476] reward: -0.10, reward 100-step MA: 1.91, action: [-1.01411188], td-error: 4951.7812, model error: 0.0000, reward error: 0.2380: 81%|████████ | 6477/8000 [19:42<04:32, 5.58it/s]
Total episode reward: 93.2676075274. Finished in 71 steps.
[6617] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00184715], td-error: 0.0000, model error: 0.0021, reward error: 9.8404: 83%|████████▎ | 6618/8000 [20:07<04:09, 5.54it/s] [6547] reward: -0.10, reward 100-step MA: 1.90, action: [-1.01907206], td-error: 2401.1875, model error: 0.0004, reward error: 0.0280: 82%|████████▏ | 6548/8000 [19:55<04:23, 5.51it/s]
Total episode reward: 93.2235003289. Finished in 72 steps.
[6689] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00427401], td-error: 30074.0000, model error: 0.0003, reward error: 2516.7756: 84%|████████▎ | 6690/8000 [20:20<03:58, 5.49it/s]reward: -0.10, reward 100-step MA: 1.90, action: [-1.02055562], td-error: 12882.8125, model error: 0.0002, reward error: 0.7609: 83%|████████▎ | 6620/8000 [20:08<04:05, 5.61it/s]
Total episode reward: 93.3046003025. Finished in 72 steps.
[6764] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00379801], td-error: 299299.5000, model error: 0.0065, reward error: 53.2323: 85%|████████▍ | 6765/8000 [20:34<03:46, 5.45it/s] [6691] reward: -0.01, reward 100-step MA: 1.91, action: [-0.32090044], td-error: 0.0000, model error: 0.0000, reward error: 0.2007: 84%|████████▎ | 6692/8000 [20:21<03:55, 5.56it/s]
Total episode reward: 93.7202089961. Finished in 75 steps.
[6835] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.01534021], td-error: 0.0000, model error: 0.0011, reward error: 0.6105: 85%|████████▌ | 6836/8000 [20:47<03:35, 5.39it/s] [6766] reward: -0.07, reward 100-step MA: 1.91, action: [-0.86211205], td-error: 0.0000, model error: 0.0005, reward error: 0.0259: 85%|████████▍ | 6767/8000 [20:34<03:45, 5.46it/s]
Total episode reward: 93.3041615159. Finished in 71 steps.
[6940] reward: 99.90, reward 100-step MA: 0.91, action: [ 1.00993085], td-error: 19347.2500, model error: 0.0003, reward error: 155.5727: 87%|████████▋ | 6941/8000 [21:06<03:14, 5.45it/s] reward: -0.01, reward 100-step MA: 1.91, action: [ 0.25121629], td-error: 0.0000, model error: 0.0000, reward error: 0.0066: 85%|████████▌ | 6838/8000 [20:47<03:31, 5.49it/s]
Total episode reward: 91.0947722998. Finished in 105 steps.
[7045] reward: 99.90, reward 100-step MA: 0.90, action: [ 0.99838042], td-error: 0.0000, model error: 0.0011, reward error: 42.3520: 88%|████████▊ | 7046/8000 [21:26<02:55, 5.45it/s] 6942] reward: -0.02, reward 100-step MA: 0.91, action: [ 0.4990654], td-error: 0.0000, model error: 0.0001, reward error: 1.5038: 87%|████████▋ | 6943/8000 [21:06<03:12, 5.50it/s]
Total episode reward: 90.2636806126. Finished in 105 steps.
[7154] reward: 99.89, reward 100-step MA: 0.91, action: [ 1.03434765], td-error: 28368.2500, model error: 0.0016, reward error: 3.0978: 89%|████████▉ | 7155/8000 [21:46<02:35, 5.42it/s] 7] reward: -0.01, reward 100-step MA: 0.91, action: [ 0.35400629], td-error: 0.0000, model error: 0.0001, reward error: 1.9994: 88%|████████▊ | 7048/8000 [21:26<02:52, 5.53it/s]
Total episode reward: 90.5581873064. Finished in 109 steps.
[7231] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00807548], td-error: 28910.0000, model error: 0.0004, reward error: 28.0701: 90%|█████████ | 7232/8000 [22:00<02:21, 5.41it/s]7156] reward: -0.00, reward 100-step MA: 0.91, action: [ 0.16145456], td-error: 0.0000, model error: 0.0000, reward error: 3.0972: 89%|████████▉ | 7157/8000 [21:46<02:32, 5.52it/s]
Total episode reward: 92.2672517492. Finished in 77 steps.
[7339] reward: 99.90, reward 100-step MA: 0.92, action: [ 0.97473192], td-error: 83867.0000, model error: 0.0004, reward error: 59.1217: 92%|█████████▏| 7340/8000 [22:20<02:01, 5.41it/s] 7233] reward: -0.06, reward 100-step MA: 1.90, action: [ 0.77996969], td-error: 11150.1250, model error: 0.0001, reward error: 0.0113: 90%|█████████ | 7234/8000 [22:00<02:21, 5.42it/s]
Total episode reward: 90.9824652727. Finished in 108 steps.
[7415] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00547302], td-error: 45716.2500, model error: 0.0003, reward error: 4.2762: 93%|█████████▎| 7416/8000 [22:34<01:47, 5.42it/s] [7341] reward: -0.01, reward 100-step MA: 0.92, action: [ 0.24372602], td-error: 0.0000, model error: 0.0000, reward error: 0.7508: 92%|█████████▏| 7342/8000 [22:20<02:00, 5.44it/s]
Total episode reward: 93.4434528524. Finished in 76 steps.
[7522] reward: 99.90, reward 100-step MA: 0.92, action: [ 1.00021493], td-error: 65563.0000, model error: 0.0004, reward error: 77.1325: 94%|█████████▍| 7523/8000 [22:53<01:27, 5.46it/s] 417] reward: -0.03, reward 100-step MA: 1.91, action: [ 0.54996085], td-error: 0.0000, model error: 0.0001, reward error: 0.1301: 93%|█████████▎| 7418/8000 [22:34<01:48, 5.38it/s]
Total episode reward: 91.3302062892. Finished in 107 steps.
[7595] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00071621], td-error: 53430.5000, model error: 0.0005, reward error: 7.6382: 95%|█████████▍| 7596/8000 [23:07<01:14, 5.40it/s] [7524] reward: -0.03, reward 100-step MA: 0.92, action: [-0.51825792], td-error: 0.0000, model error: 0.0007, reward error: 0.0562: 94%|█████████▍| 7525/8000 [22:54<01:26, 5.52it/s]
Total episode reward: 92.8676053751. Finished in 73 steps.
[7668] reward: 99.90, reward 100-step MA: 1.90, action: [ 0.97726476], td-error: 14471.0000, model error: 0.0007, reward error: 0.0863: 96%|█████████▌| 7669/8000 [23:20<01:01, 5.42it/s] 7597] reward: -0.01, reward 100-step MA: 1.90, action: [-0.3307277], td-error: 0.0000, model error: 0.0001, reward error: 0.2370: 95%|█████████▍| 7598/8000 [23:07<01:13, 5.45it/s]
Total episode reward: 92.749057066. Finished in 73 steps.
[7776] reward: 99.90, reward 100-step MA: 0.91, action: [ 1.01603138], td-error: 53074.0000, model error: 0.0010, reward error: 0.2535: 97%|█████████▋| 7777/8000 [23:40<00:41, 5.38it/s] 70] reward: -0.02, reward 100-step MA: 1.90, action: [ 0.40066624], td-error: 0.0000, model error: 0.0002, reward error: 0.6419: 96%|█████████▌| 7671/8000 [23:20<01:00, 5.40it/s]
Total episode reward: 90.8966227119. Finished in 108 steps.
[7849] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.9964174], td-error: 66853.5000, model error: 0.0006, reward error: 41.5829: 98%|█████████▊| 7850/8000 [23:53<00:24, 6.07it/s] 7778] reward: -0.03, reward 100-step MA: 0.91, action: [-0.56303269], td-error: 6170.7500, model error: 0.0000, reward error: 0.2120: 97%|█████████▋| 7779/8000 [23:40<00:41, 5.38it/s]
Total episode reward: 93.4664236065. Finished in 73 steps.
[7960] reward: 99.90, reward 100-step MA: 0.91, action: [ 0.99759507], td-error: 67824.0000, model error: 0.0005, reward error: 27.7434: 100%|█████████▉| 7961/8000 [24:11<00:06, 5.88it/s] 51] reward: -0.01, reward 100-step MA: 1.91, action: [ 0.24447751], td-error: 0.0000, model error: 0.0000, reward error: 1.6037: 98%|█████████▊| 7852/8000 [23:53<00:24, 6.03it/s]
Total episode reward: 90.7968656336. Finished in 111 steps.
[7999] reward: -0.00, reward 100-step MA: 0.91, action: [ 0.14601552], td-error: 0.0000, model error: 0.0001, reward error: 0.0480: 100%|██████████| 8000/8000 [24:18<00:00, 6.17it/s] [7962] reward: -0.00, reward 100-step MA: 0.92, action: [-0.17932951], td-error: 0.0000, model error: 0.0002, reward error: 2.0172: 100%|█████████▉| 7963/8000 [24:11<00:06, 5.89it/s]
In [15]:
exp.save()
print("Experiment results saved in " + exp.path)
Experiment results saved in experiments/experiment_dmlac_MountainCarContinuous-v0_1495409405
In [16]:
exp.plot_cumulative_reward()
Out[16]:
[<matplotlib.lines.Line2D at 0x7fd6870fcc50>]
In [17]:
exp.plot_reward()
Out[17]:
[<matplotlib.lines.Line2D at 0x7fd6870fcc10>]
In [18]:
exp.plot_td_error()
Out[18]:
[<matplotlib.lines.Line2D at 0x7fd68754ffd0>]
In [19]:
exp.plot_model_error(skip_steps=settings["learning_start"]+10)
Out[19]:
[<matplotlib.lines.Line2D at 0x7fd6cc13abd0>]
In [20]:
exp.plot_reward_error(skip_steps=settings["learning_start"]+10)
Out[20]:
[<matplotlib.lines.Line2D at 0x7fd687319850>]
In [21]:
exp.plot_episode_reward()
Out[21]:
[<matplotlib.lines.Line2D at 0x7fd6871def50>]
In [22]:
exp.plot_episode_duration()
Out[22]:
[<matplotlib.lines.Line2D at 0x7fd682863190>]
In [23]:
if settings["render_environment"]:
exp.display_frames_as_gif()
In [24]:
#session.close()
In [25]:
exp.print_all_tf_variables()
(u'Actor_hidden_0_256_W:0', array([[ 3.10449123e-01, -8.30250323e-01, 9.42699611e-01,
3.77484947e-01, 4.49805200e-01, -6.36179745e-01,
1.15343511e-01, -6.44223213e-01, 3.69558215e-01,
-4.33310539e-01, 2.73950487e-01, -4.33384120e-01,
4.97410417e-01, -1.01236284e+00, -1.06961274e+00,
8.49069238e-01, 2.02725339e+00, -6.94160819e-01,
2.32811853e-01, 8.26460540e-01, -4.79767799e-01,
1.45014729e-02, -9.26192403e-01, -8.38630021e-01,
-6.60115004e-01, -3.48641157e-01, 5.71705520e-01,
-7.83687830e-01, -3.79593939e-01, 8.58444273e-01,
9.39537525e-01, 6.41536236e-01, -6.52774513e-01,
2.23633885e+00, 1.42488807e-01, -1.46013528e-01,
-5.02676189e-01, 8.99448574e-01, 8.86535048e-02,
-7.95273423e-01, -9.53532979e-02, -3.82327825e-01,
2.59257585e-01, -1.67254496e+00, 6.52680218e-01,
-5.26656657e-02, -4.46071893e-01, -2.09293410e-01,
-4.37916130e-01, 3.03620934e-01, 2.13052541e-01,
4.79669631e-01, -8.36777210e-01, -9.73704457e-03,
1.03103466e-01, -1.75322998e+00, 1.38333046e+00,
-2.07181647e-01, -6.89368069e-01, 1.73870504e-01,
4.70507473e-01, -8.17631066e-01, 1.84106958e+00,
-5.70603490e-01, 2.37111136e-01, 2.55336255e-01,
3.67859781e-01, -3.16298068e-01, 3.83851677e-02,
6.24049783e-01, -1.42319906e+00, 1.48202166e-01,
1.49342418e-01, 3.70001853e-01, -1.40107796e-01,
7.73420036e-01, -2.83770263e-01, -6.50652051e-01,
7.59938121e-01, 1.53196100e-02, -5.87954879e-01,
-1.48905382e-01, 1.48286867e+00, 8.16242814e-01,
8.87747765e-01, 3.46297354e-01, -3.37075919e-01,
-5.45897782e-01, -5.93766980e-02, -5.12390137e-01,
6.47911072e-01, -2.22436875e-01, -3.63669842e-01,
-3.67496431e-01, -6.38475299e-01, 1.77550599e-01,
-3.23514581e-01, 4.80933428e-01, 1.07571018e+00,
4.24357742e-01, -1.38386548e+00, -4.30373639e-01,
-6.20666623e-01, 1.19751059e-01, 9.49941099e-01,
-6.25970662e-01, -1.03036189e+00, 1.52562499e+00,
1.55439600e-01, 7.24767298e-02, -1.33926535e+00,
2.98655659e-01, -3.40606183e-01, -2.78280616e-01,
-4.37145960e-03, 5.58452029e-03, 5.62004447e-01,
1.87630832e+00, -6.21275365e-01, -1.05637050e+00,
7.73365572e-02, 6.57335937e-01, -5.08509755e-01,
6.36917770e-01, -7.48391807e-01, -2.77751565e-01,
-7.99452424e-01, 1.00904934e-01, 3.70112360e-01,
1.07833219e+00, 3.68626118e-02, 5.10026515e-01,
2.39380226e-01, -3.70802402e-01, 2.12347761e-01,
-6.37997016e-02, 5.26171267e-01, 2.18239158e-01,
2.57296443e-01, -2.41112486e-01, 7.29543149e-01,
5.29834867e-01, 4.87247586e-01, -7.24941850e-01,
3.16994153e-02, -2.11869389e-01, 6.27519906e-01,
-1.00062989e-01, -4.70543742e-01, -1.21216893e+00,
1.43831384e+00, 1.30912983e+00, 5.44346264e-03,
-3.80148232e-01, 7.83552825e-01, 7.04818010e-01,
-2.71088719e-01, 2.03143954e+00, 9.94931698e-01,
-7.37361908e-02, 3.16743106e-01, 2.46313453e-01,
-1.21103835e+00, -4.39995974e-01, -2.19813392e-01,
-5.03776193e-01, -1.03917408e+00, 2.66602397e-01,
1.63534850e-01, -2.88906425e-01, 4.15222853e-01,
-5.21876931e-01, -1.41412950e+00, -1.00904989e+00,
1.16229069e+00, -3.75472575e-01, -1.00550735e+00,
5.24826825e-01, 7.95499206e-01, 8.22876841e-02,
-1.49883759e+00, 5.00100136e-01, 8.02434504e-01,
-3.61618638e-01, 4.64179307e-01, -8.93782675e-01,
4.99556363e-01, 4.02611606e-02, -6.65707767e-01,
1.79630792e+00, 6.41576409e-01, 1.83129162e-01,
7.27805495e-02, 2.18539342e-01, 5.71363926e-01,
-3.85818988e-01, 1.46925223e+00, 3.15571040e-01,
4.09021050e-01, 5.53112805e-01, 1.91366240e-01,
-4.69458789e-01, 5.08951604e-01, 4.69405174e-01,
1.75137985e-02, -1.14242423e+00, -9.83097970e-01,
1.95617899e-01, -1.93564463e-02, -3.60637099e-01,
-4.15164679e-01, 3.03075373e-01, -4.37166512e-01,
4.38012034e-01, -5.57291627e-01, 2.99865037e-01,
2.60716826e-01, -3.56903493e-01, 1.46739995e+00,
-6.21088445e-01, -1.20839633e-01, 1.53859556e+00,
-6.54112399e-02, -7.01172650e-02, 4.92060363e-01,
8.78054649e-02, 5.93132913e-01, 1.13010705e-02,
-7.77364001e-02, -8.58477354e-01, 7.16403306e-01,
2.42307007e-01, -5.58180809e-01, -5.83780706e-01,
-6.03426814e-01, 1.38863981e+00, 4.60923314e-01,
-3.80864948e-01, 7.19509125e-02, 1.67480302e+00,
-1.98604837e-02, 1.42359093e-01, 5.32993853e-01,
4.92212534e-01, -4.46462750e-01, -7.50971377e-01,
2.68843174e-01, 1.63841414e+00, 7.35512733e-01,
4.21109438e-01, -4.05422330e-01, -6.72171414e-01,
-5.43919981e-01, 9.54858780e-01, 9.75027084e-01,
-1.97575331e-01],
[ -1.51592624e+00, -7.49542892e-01, 6.11999154e-01,
-6.62099957e-01, 4.31255698e-02, 2.01261306e+00,
4.19409901e-01, 1.04195487e+00, 1.11761324e-01,
-8.06235135e-01, -6.83436632e-01, 7.24296272e-01,
-1.94077921e+00, -1.28406638e-04, -2.92900592e-01,
-1.31146979e+00, -6.02529012e-02, -6.41608192e-03,
1.02498174e+00, 1.46659172e+00, 1.99112415e-01,
3.84539485e-01, -7.37488687e-01, 1.51807320e+00,
1.42199552e+00, -8.22976872e-05, -4.01701301e-01,
-1.98313877e-01, -1.28167462e+00, 3.05877626e-02,
-4.01048332e-01, -1.07658875e+00, -1.48820651e+00,
3.08603734e-01, 5.99196434e-01, 3.73436421e-01,
2.34998941e+00, -7.82342494e-01, -8.45778942e-01,
1.60674846e+00, 4.42735367e-02, -6.29005849e-01,
1.16891468e+00, 5.23077369e-01, -6.09504759e-01,
1.11510026e+00, 4.07289058e-01, -1.04694486e+00,
-2.26057601e+00, 2.49116421e+00, 1.12088704e+00,
-1.11294188e-01, 1.27260947e+00, -2.55220056e-01,
-1.21962345e+00, -8.16752732e-01, 6.53686702e-01,
-8.95889997e-01, 1.99894989e+00, 5.58066428e-01,
1.79874980e+00, 1.56600475e+00, 6.05083227e-01,
1.71152011e-01, 1.65872052e-01, 1.11960940e-01,
8.23391140e-01, -2.43463472e-01, -2.16933906e-01,
1.38933659e+00, -1.81411669e-01, -3.98462564e-01,
9.68169212e-01, -1.89817631e+00, -2.45641410e-01,
-4.93652999e-01, -1.53802204e+00, 7.97797814e-02,
-1.48996726e-01, -6.66978657e-01, 1.65711805e-01,
1.53682029e+00, 2.49216080e-01, -7.59747863e-01,
7.52050355e-02, -4.96835113e-01, 1.65724512e-02,
-1.52182436e+00, 9.23890352e-01, 3.93407911e-01,
-3.28310549e-01, -8.86061668e-01, -1.05737221e+00,
2.45505071e+00, -8.00789237e-01, 8.94858181e-01,
-4.11370128e-01, -1.55171371e+00, 5.08352101e-01,
-9.53387499e-01, 1.13606775e+00, -4.20957923e-01,
7.53366113e-01, -6.95217907e-01, 3.22560906e-01,
-1.41082987e-01, 7.30462611e-01, 7.67465830e-01,
2.40835235e-01, -9.43944871e-01, 6.22783124e-01,
-1.37055740e-01, -2.18182278e+00, 2.30365485e-01,
2.86235541e-01, 6.37093306e-01, 1.03475785e+00,
2.02112570e-01, -4.49046403e-01, -1.62055540e+00,
-8.57153594e-01, 1.26699734e+00, 3.73446077e-01,
-3.27348918e-01, -1.80117440e+00, -3.20480093e-02,
-2.04608098e-01, -1.44677329e+00, 3.05840671e-01,
1.84375420e-01, -8.95217896e-01, -1.06735384e+00,
-6.55076265e-01, -2.20221162e-01, 2.48574570e-01,
-5.02541482e-01, -7.11997211e-01, -1.40508279e-01,
1.91649973e+00, -1.48094499e+00, 7.51634479e-01,
1.34761691e+00, 3.20442230e-01, 9.60936487e-01,
-1.19837952e+00, -2.66969061e+00, -9.34391394e-02,
5.27702093e-01, -3.10215712e+00, 2.71029234e-01,
1.81318209e-01, -4.28294055e-02, -1.34205449e+00,
-8.57224107e-01, -1.21778703e+00, -5.36883250e-02,
-5.59774756e-01, 1.79406977e+00, -8.41634512e-01,
3.72039348e-01, -2.53998113e+00, -1.50978851e+00,
5.55885673e-01, 1.07909665e-01, 1.11268187e+00,
-1.37666953e+00, 1.17521808e-01, -2.60952115e-01,
2.51032025e-01, 5.94413280e-01, 1.37556159e+00,
6.56386435e-01, 7.19325006e-01, 3.61090809e-01,
1.31752133e+00, 1.32535398e+00, -1.08683014e+00,
-1.87055796e-01, -6.50638223e-01, -1.71927667e+00,
1.65605831e+00, 1.71931252e-01, -3.02331932e-02,
-4.15084898e-01, -9.85926807e-01, 1.16541974e-01,
-1.30422962e+00, 3.17402691e-01, 1.71063077e+00,
4.54549193e-01, -1.27560854e+00, -4.55311209e-01,
-1.72573507e+00, -8.81045103e-01, 1.04730569e-01,
-1.17039418e+00, 2.01439977e+00, 2.55889326e-01,
-5.61110713e-02, -1.44391215e+00, -2.36331511e+00,
-3.72604668e-01, 9.42972839e-01, 8.59401226e-01,
1.25858140e+00, -9.41172004e-01, -3.36117372e-02,
-1.06534338e+00, -1.13696742e+00, -1.04413354e+00,
-7.89196908e-01, 2.00451064e+00, -1.93853602e-01,
3.13512892e-01, -3.03817719e-01, -8.04859698e-01,
-1.95716560e+00, -1.03446293e+00, -1.07354790e-01,
1.18491185e+00, -4.11621422e-01, 6.83324993e-01,
6.81254804e-01, -1.74734890e-02, -1.07716583e-01,
-1.06789730e-01, 9.15462434e-01, -1.34588110e+00,
2.62658954e-01, -7.88015723e-01, -7.42494106e-01,
-5.54709375e-01, 1.10739875e+00, -1.33411026e+00,
1.41661406e+00, 3.81778121e-01, -6.90007150e-01,
-8.57861638e-01, -1.09430885e+00, -2.36567706e-01,
-2.35813904e+00, 1.05396545e+00, 3.87365222e-01,
-1.81923354e+00, 6.20441258e-01, 8.90031457e-01,
-1.56834197e+00, 2.74255037e-01, 1.44217223e-01,
-6.31276071e-01, -5.85976720e-01, 2.00079346e+00,
8.08666527e-01, 2.62213588e-01, 1.09353757e+00,
4.51944053e-01]], dtype=float32))
(u'Actor_hidden_0_256_b:0', array([ 0.07214922, -0.78546345, -1.00438392, 0.19715561, -1.48504877,
0.06085639, 0.47990698, 0.56901807, 0.83275473, -1.90649211,
1.03870821, -1.47584236, 0.72566807, 0.01081594, 0.27557841,
-0.49429631, -2.17523766, 0.53676444, -0.2180678 , 0.44604412,
1.21879053, 1.32373595, 0.02005956, 1.85795105, -1.26615715,
0.22453173, -0.30329981, -0.12747328, 0.90810901, -1.70135689,
0.84353435, -0.87940687, 0.00431902, -0.34795865, 1.4399488 ,
-1.0446744 , -0.20907804, 0.71967763, -1.26791942, -0.21812858,
2.40533853, 0.3383005 , 0.69449294, -1.30670977, -0.77633798,
-0.07685676, 1.85321629, 1.10277557, 0.9368543 , 0.49218932,
-0.63768691, -0.21799085, 0.10742061, 0.83987677, 1.32157934,
-2.18173099, -0.87835741, -0.222487 , 2.11840892, -1.00852764,
0.94938886, 0.12695195, -0.19481376, -0.35986832, 1.29220986,
0.35643941, 1.19123113, 0.20394664, 0.46481544, 0.01852932,
0.88732266, 0.58767533, 1.40355909, 0.31477895, -1.24114788,
0.69218922, 0.05155312, 0.56589603, -0.38991308, 1.79492164,
1.48644149, 1.13645566, 0.23842973, 0.47975585, -1.14313471,
0.13942076, -0.84264344, -0.10642988, 1.09958386, 1.45138097,
0.94575268, -1.70616555, 0.39221102, -0.03584361, 1.0872705 ,
-0.37829629, 0.11538161, -0.44555256, 1.49062645, 0.27866712,
-0.60631561, -0.56735408, 1.30270708, 0.06741523, 1.15089488,
-1.49868309, -1.66847312, -1.29338384, 0.47143066, 0.62860668,
-0.57687277, 1.31816578, 0.19437647, 0.31471309, 0.19436765,
0.24029084, -0.36464721, -1.92371702, -0.01002108, 0.42618018,
-0.53153521, -0.15312277, -2.19619513, 0.68469721, 1.09570742,
0.38070014, -1.13714612, 0.06565613, 0.67372793, -1.36645854,
0.73753226, 0.19567962, -1.14713776, -0.36725301, 0.65088421,
0.22714865, 0.51036429, -2.15045571, -0.0275881 , 0.09927447,
2.11685181, -0.09816113, -0.89382857, -0.45263276, -1.07821953,
0.61127269, 0.84506708, 0.56237006, 0.44608986, -1.04400241,
0.97529835, 0.0106624 , -0.23265848, 0.3933807 , 1.25996578,
-1.2225945 , 1.05040658, 0.03316668, -0.19714241, -0.57795733,
0.34427527, 0.19605583, -0.84440023, -2.00090384, 1.30366349,
-0.62255055, 0.53164619, -0.96031421, 0.86000836, -0.45682564,
2.38795567, 0.60437953, 1.66458035, -0.23324808, 0.06611484,
-0.0729653 , -0.19098383, -1.67569101, -0.75702471, 0.95057243,
0.12666622, 0.08708459, 1.3382386 , -0.87650836, 0.01990085,
0.75771427, 0.67315799, 0.70722902, 0.51008409, -0.70822579,
-0.73659015, -0.35089159, 0.73571956, -0.27015549, 0.6171031 ,
1.11198938, 0.06259083, 1.6938597 , -0.76959693, 0.41990301,
0.23620869, -0.8559379 , -0.4823043 , 1.17001283, 0.21909697,
-0.66484755, -0.95528591, 0.85026169, 1.52215219, -0.88664263,
-0.60047865, 0.22005011, -1.16632986, -2.91672182, -0.2011711 ,
-0.07345106, 1.65519881, 1.08711147, 1.24205732, -0.14575133,
-1.71870661, 0.27512389, -1.14330649, 0.59984338, -1.54440832,
2.34930682, 1.4779644 , 0.09797284, 0.93564773, 0.27105901,
-0.51122558, -0.30610386, -1.14462459, -0.7583642 , -0.28796983,
1.38812447, -0.96811396, -1.23090696, 1.5912894 , -1.09750903,
-0.01966865, 1.42085934, 0.3411766 , 0.39092228, -0.52724254,
0.40238291, 0.15843235, 0.08066022, -1.75022304, 0.57436007,
1.44835103, -0.43396157, -2.34229183, -0.64372277, 2.17161632,
0.50309098], dtype=float32))
(u'Actor_hidden_1_128_W:0', array([[-0.05539365, 0.31018883, 0.00590796, ..., 0.36605144,
0.07268371, 0.06087329],
[ 0.01831991, -0.05476255, 0.08132376, ..., 0.00248242,
-0.04855793, -0.02261225],
[-0.03339755, -0.01112246, -0.01459376, ..., 0.00144716,
-0.03286942, -0.04451962],
...,
[ 0.03937049, 0.04719676, -0.11512435, ..., -0.03109195,
0.07422983, -0.21680765],
[-0.02441513, -0.04510343, -0.11812051, ..., 0.02233244,
-0.07023379, -0.05401293],
[ 0.06101697, -0.02863118, 0.00343116, ..., -0.05506302,
-0.00358154, -0.07913682]], dtype=float32))
(u'Actor_hidden_1_128_b:0', array([ 0.41621351, -0.42427388, -0.00422856, -0.30661556, 0.20730875,
-0.33565906, 0.0327458 , -2.83684969, -1.36605275, -1.20085144,
0.51125461, -0.02750342, -0.19149749, -1.07157135, -1.23377109,
-1.43124604, -0.62777317, -0.18607704, -0.18397607, -1.25632215,
-0.19910227, 0.49894005, -1.10552323, 1.42755497, -0.44990894,
-0.87645924, 1.58189166, 1.37330508, 0.87148148, 0.78279603,
1.8252095 , -0.39326489, 0.32325417, 1.30756783, -0.98303694,
-1.708547 , -2.30173469, 0.39821398, 0.90269887, 0.74358702,
1.43085849, 0.72567976, 0.16063614, 1.14393485, -0.84297389,
-0.22155881, -1.13185596, -0.10502769, -0.84227687, 0.11755602,
-0.87416226, 1.50072241, -0.5855366 , 0.00696596, -1.09810233,
1.41646147, -0.51249141, -0.1194506 , 0.22569335, 1.50270474,
-1.73161936, 0.68849361, 1.51078892, 1.1250205 , -0.06838015,
1.72271013, -0.17295025, -0.98560554, -1.11576939, 0.76343685,
-0.7187891 , 0.7347036 , -0.07161925, 0.05494232, -1.80965519,
0.94903499, 0.48048538, -0.42937776, -1.26070547, -1.16464114,
1.17826951, 0.39140776, -2.340487 , -1.4403882 , -0.61025816,
-1.19673908, 0.32465047, 0.51488942, 0.17014113, 1.82833135,
-0.72343653, -0.3368305 , -0.24323779, -1.25199652, -0.01431947,
-1.85499215, 0.16064359, 0.36242396, -0.01920447, 1.33663535,
0.54003382, -0.01554809, 0.6696589 , -0.82424039, 1.59164667,
0.45771787, 0.10717127, -0.79415166, 0.19502279, 1.42039669,
1.42430317, 2.21474886, 1.01852524, 0.88905168, 0.24273457,
0.32504621, -0.45245841, -0.39507934, -0.79255974, -0.88800091,
-0.33243203, 0.35760346, 0.22480683, 0.69191027, 1.42765713,
0.3327601 , -0.14378934, -0.29342738], dtype=float32))
(u'Actor_output_W:0', array([[ -6.97577447e-02],
[ -1.28753796e-01],
[ -8.56437534e-02],
[ -1.13670349e-01],
[ -7.30649680e-02],
[ -5.08190282e-02],
[ 4.08321735e-04],
[ 6.30917847e-02],
[ -4.91436273e-02],
[ -6.82252645e-02],
[ 4.19773422e-02],
[ 4.76834327e-02],
[ -1.59564981e-04],
[ -9.82380807e-02],
[ -3.56705822e-02],
[ 3.61164622e-02],
[ 4.56456169e-02],
[ -9.02476460e-02],
[ -4.33173068e-02],
[ 8.50057080e-02],
[ -1.20780620e-04],
[ -2.69063041e-02],
[ -3.50558273e-02],
[ 6.90147504e-02],
[ -3.58270190e-04],
[ -3.41267228e-01],
[ -5.98354127e-05],
[ -3.80052514e-02],
[ 3.53333598e-06],
[ 1.46252532e-02],
[ 3.00613903e-02],
[ -1.76609397e-01],
[ 2.42736414e-02],
[ 9.78254825e-02],
[ -1.72467798e-01],
[ 9.08670500e-02],
[ 1.39877200e-01],
[ -3.49021284e-05],
[ -2.64384813e-04],
[ -2.81390160e-01],
[ 8.17910284e-02],
[ 1.76551361e-02],
[ 2.46100109e-02],
[ 1.18735388e-01],
[ 2.93876454e-02],
[ 1.26786560e-01],
[ -1.28468484e-01],
[ -3.70533839e-02],
[ 6.46423995e-02],
[ 1.27451971e-01],
[ -8.82922783e-02],
[ 1.31195327e-02],
[ -6.31008968e-02],
[ 1.70819840e-04],
[ 1.36794880e-01],
[ 5.67814596e-02],
[ -4.90942188e-02],
[ -1.34425402e-01],
[ 4.86825816e-02],
[ 2.11245060e-01],
[ -2.37583488e-01],
[ -8.83534253e-02],
[ 2.06036042e-04],
[ 5.36053590e-02],
[ 9.22160521e-02],
[ -5.18912449e-02],
[ -9.65843871e-02],
[ -5.60989007e-02],
[ 8.45848471e-02],
[ 2.62979828e-02],
[ 8.72070342e-02],
[ -4.93118539e-02],
[ 3.43440734e-02],
[ 1.31267942e-02],
[ -1.54403716e-01],
[ -2.46749260e-02],
[ 8.10628235e-02],
[ 2.32190907e-01],
[ -1.06252171e-01],
[ -4.77600172e-02],
[ -7.11765420e-03],
[ -2.13555563e-02],
[ 6.98528485e-03],
[ 3.14043500e-02],
[ -1.62781298e-01],
[ 6.96966350e-02],
[ -1.05134875e-01],
[ 1.47987902e-01],
[ -1.36359408e-01],
[ -7.26194158e-02],
[ -1.11592315e-01],
[ 2.86539588e-02],
[ 3.54902185e-02],
[ -5.32247908e-02],
[ -7.83200487e-02],
[ -9.84988734e-02],
[ 5.74149974e-02],
[ 7.61928111e-02],
[ -2.24248397e-05],
[ -4.04784866e-02],
[ -4.34139781e-02],
[ 6.09332062e-02],
[ 1.73147346e-04],
[ 1.62889495e-01],
[ 2.10282014e-04],
[ 1.06821597e-01],
[ 8.33736412e-05],
[ -1.05954476e-01],
[ -9.87091362e-02],
[ -1.05999531e-02],
[ -3.79648968e-03],
[ 1.45537360e-02],
[ -2.07850360e-03],
[ 1.24130456e-04],
[ 6.42290190e-02],
[ 1.32774219e-01],
[ 2.72949748e-02],
[ -2.61020381e-02],
[ 7.91526362e-02],
[ 3.14362049e-01],
[ 1.13056283e-02],
[ -6.49069250e-02],
[ 6.73613101e-02],
[ 9.70361680e-02],
[ -2.37057888e-04],
[ 8.47318247e-02],
[ -6.46101683e-02],
[ 8.02126080e-02]], dtype=float32))
(u'Actor_output_b:0', array([-1.5074873], dtype=float32))
(u'Model_encoding_0_128_W:0', array([[ 0.49135983, 2.02494955, -0.07118495, -0.27534938, 0.30721453,
0.17636266, -0.49619541, -0.08792351, 0.10178363, 0.78314883,
-0.91556346, -1.19868577, -0.19927065, -0.11085794, 0.61602956,
-0.35949484, -0.95257956, 0.39195541, -0.08059894, -0.55432796,
0.89362639, 0.64887977, -1.25068986, 0.03805911, -0.35016337,
0.52542078, -0.03747638, 0.61598259, 0.40774062, -0.28334215,
0.71563554, 0.66326678, 0.3492575 , -0.0668598 , -0.61655718,
0.83253944, 0.57401955, -0.25258547, -0.38941172, -0.55221194,
0.29106081, 0.6213333 , -0.47420859, -0.50257218, 0.19875187,
-0.11892499, -0.51087332, -0.67479306, 0.81231612, 0.27198434,
0.31672797, 0.67245597, -0.46344236, 0.96861231, 0.92614084,
1.03407919, 0.60647541, 0.45851532, 0.39213786, 0.56564456,
-0.12938334, -0.38642472, -0.82032609, 1.19363606, 0.94817114,
2.46784163, 0.41275638, -0.32228535, 0.47796771, -0.72206002,
-0.69513828, 1.90852582, 1.54587805, -0.61194736, -0.12164801,
-0.35599893, -0.77916074, -0.34515893, -1.05292928, 0.33743718,
-0.2704635 , -0.08678628, -0.67061329, 0.05448839, -0.01044307,
0.96258944, 1.2116791 , -1.05093348, -0.31048876, -0.04458882,
-0.12112316, -1.01571238, 0.33702984, 0.22812141, 0.37785721,
-0.69977874, -0.06910076, 0.91558307, -0.66318876, 0.6867215 ,
-1.33307838, 0.37420198, 0.32248434, -0.55450475, -0.59395701,
0.91960901, 0.55043966, -0.40009674, -0.82186019, -0.03930127,
0.27047688, -0.93537116, -0.32989272, -1.6692028 , -0.86406302,
0.42207715, -0.07763728, -1.26062214, -0.04499668, -0.68968737,
-0.5175795 , 0.83327585, -2.11631346, 0.37136847, -0.50874996,
-0.63587552, -0.64337575, -0.09352721],
[-0.21632023, 0.1125165 , 0.23420781, 1.83606517, -1.45025277,
-1.29272211, -0.37774491, 0.58434677, -1.15546095, 0.28359711,
-1.11189711, 0.85457045, -0.45618621, -0.77412546, -0.10723072,
0.71687603, 0.61412781, -0.69724941, -0.85425532, -1.13510406,
-0.09079495, 0.02706167, 1.76157761, -0.07366968, 0.71523565,
0.59958601, 0.05047134, -1.58071971, -0.03308054, -0.45993721,
-0.28048748, 0.24857736, 0.00699796, -0.1329392 , -0.73971921,
-0.57535619, 0.50437748, 1.75749731, 1.06307793, 1.46324718,
0.08598807, -0.91971999, 0.29750925, 1.39606547, 0.318988 ,
0.3715176 , -0.38965905, -0.91044903, -0.4318774 , -0.3383778 ,
-0.69236249, -1.05120885, 0.77660662, -0.25631592, -0.81724107,
-0.64480901, 1.18971515, 0.7911014 , 0.01660153, -0.44471833,
-1.12535787, -0.70930743, -1.02760065, -0.674559 , 1.32394922,
0.90676254, -0.612688 , 0.71717602, -0.82950389, 0.28984794,
-0.99202704, 0.31038326, 0.14484826, -0.53702027, -1.56600475,
-1.36087525, 0.63819546, 0.06602522, -0.42591518, -0.50585854,
0.86389244, 0.56902802, -0.03673161, 0.27511817, -1.08772337,
0.06532306, 0.44089395, -0.18530779, 0.24156006, -0.20814173,
0.50527835, 0.08122739, 0.78474092, -1.29352689, 0.46182624,
-1.31591129, 0.8337943 , -0.71795368, -0.03038207, 1.67671323,
0.48568043, -0.50704873, 1.53369415, 0.22075017, 0.18895218,
-0.34750399, -0.09676594, 0.25954738, 0.719872 , 0.04587094,
-0.56216592, 0.15374111, 0.09782296, 0.58309281, -1.28402305,
0.48515248, 0.71122861, -1.10889125, 0.966434 , 0.49177578,
1.02336919, -0.44606709, -0.05176596, -0.30860946, -1.74722171,
0.18725264, -1.83788133, 1.20481563]], dtype=float32))
(u'Model_encoding_0_128_b:0', array([-1.22562611, -0.33430201, 0.56660998, -0.5525707 , 0.21461278,
-0.38869458, -0.7084769 , -0.51471567, -0.19620588, 1.40862381,
2.76085377, -0.36599094, -1.69590938, 0.41390118, 1.1513598 ,
0.07737302, -0.4868412 , 0.12126729, 0.06855105, -0.45940802,
-0.25970504, 1.31789851, 0.13902006, 0.40363348, -0.98300397,
-0.05524224, -0.02177958, -0.0455171 , 1.53305733, -0.49246743,
0.01723312, 1.04680634, 0.45212916, 0.88116866, -0.56553251,
-1.24109125, 0.12304506, 0.22383091, -0.2370618 , -0.39416251,
0.16613878, 0.28619722, -0.17757654, -0.32183695, -0.7355907 ,
0.16254038, 0.31476912, -0.67230785, -0.48046842, 0.01776509,
-0.0345089 , 0.27923366, -0.87087631, -1.83303523, 0.08506585,
0.82296258, 1.04164279, -0.54111725, -1.05920422, 0.3554152 ,
-0.35237351, 0.23148172, -1.52304363, -0.82314396, -0.82826853,
-1.10440946, 0.18286546, -0.02940737, 0.15371189, 2.44524598,
-0.82687199, 0.95948046, -0.60236788, -0.75206769, 0.25964358,
-0.75476152, -0.57184225, 0.25722277, 0.01610618, 2.16282105,
-0.35516462, -0.23121892, 1.4377532 , -0.73859173, 0.34548634,
1.1461699 , -0.17399816, -1.06097472, -0.2220127 , -1.00130618,
0.1209299 , 0.16891114, 0.62066668, -0.02067512, -0.02212401,
-1.045331 , -0.18486097, 0.71227813, -1.1863023 , 1.27522814,
-0.59231478, 1.16266322, -0.11060551, 0.08405732, 0.69582731,
0.89554018, 1.15385723, 0.1005926 , -0.27613667, -0.02335725,
0.02979112, -0.83570695, -0.35366011, -1.58089709, -1.13343942,
-0.57095158, 0.01899027, -1.65908706, 1.6949755 , -0.51830995,
0.56891799, 0.74736381, -1.5402317 , 0.2252368 , -0.84600592,
0.04043945, 0.89700454, 0.06171588], dtype=float32))
(u'Model_encoding_1_128_W:0', array([[ -6.74038887e-01, -2.42632478e-01, -2.70694375e-01,
-1.36149275e+00, 5.77482907e-03, -1.04053104e+00,
-3.92826110e-01, -4.07096976e-03, -8.19363058e-01,
-1.27280581e+00, 1.38489619e-01, -9.31482613e-01,
1.12993348e+00, 1.20693147e+00, -5.84634364e-01,
-8.62289667e-01, 2.32649699e-01, 3.25781941e-01,
7.75916219e-01, -2.88255960e-01, 8.87540653e-02,
-9.49007273e-01, -2.37658229e-02, 3.37574899e-01,
-7.07585335e-01, -2.81490460e-02, 7.28739649e-02,
3.97290140e-01, -7.06052482e-01, 2.36796169e-03,
-2.28467181e-01, -1.65826213e+00, 1.66265249e-01,
-6.38918066e-03, -1.31265140e+00, 1.45530403e-01,
2.95098638e-03, -2.50877190e+00, 2.89891571e-01,
-6.64273620e-01, 1.98178923e+00, 5.19425492e-04,
4.70856577e-01, 9.01547253e-01, 1.23962246e-01,
3.15625742e-02, 1.02134213e-01, -7.97283113e-01,
9.13206756e-01, -1.02256134e-01, -4.44302947e-04,
-4.93920386e-01, 3.41730922e-01, -1.64214277e+00,
5.32873213e-01, -4.86214682e-02, 1.28787048e-02,
1.44601691e+00, -1.17424059e+00, -4.24023718e-01,
4.80913907e-01, 7.00498968e-02, 9.51930955e-02,
-1.26806036e-01, 2.71445233e-02, 3.08014810e-01,
6.79824471e-01, -1.61754596e-03, 1.46525607e-01,
-6.62060618e-01, 1.55752734e-03, 3.82252514e-01,
-5.27539313e-01, -5.47979653e-01, 1.35313082e+00,
-2.21263781e-01, -2.06067450e-02, 5.46923637e-01,
-1.02278209e+00, 1.66535258e-01, -2.16320087e-03,
3.94856870e-01, -8.32624197e-01, -7.55917430e-01,
3.65004279e-02, -2.52826869e-01, 1.58670557e+00,
9.77709949e-01, 4.27982658e-01, 7.81281531e-01,
2.79629184e-03, -1.36064231e+00, -5.27529061e-01,
5.01257360e-01, 6.09868839e-02, -5.28920650e-01,
-6.81317300e-02, 4.95825021e-04, -3.27689230e-01,
1.85540353e-03, -1.67728925e+00, 9.72414738e-04,
1.50900315e-02, 1.82245231e+00, -1.21773372e-03,
-2.47350079e-03, -2.86400318e-03, -5.90594560e-02,
-5.30899167e-01, 2.91494787e-01, 1.34134024e-01,
2.47480441e-02, 1.81875529e-03, -9.21685755e-01,
-6.85796261e-01, -5.71711138e-02, -9.09894884e-01,
-2.09849283e-01, -3.01176012e-02, 2.04622924e-01,
-5.06705081e-04, -3.02778751e-01, 3.94692868e-01,
-3.75227392e-01, 1.57832336e-02, -4.42885328e-03,
-1.22451149e-02, 2.28623264e-02]], dtype=float32))
(u'Model_encoding_1_128_b:0', array([ -1.04700732e+00, -6.88041270e-01, -1.55732608e+00,
5.89378119e-01, 3.10630491e-03, -4.43711877e-01,
1.56140089e+00, 1.69414186e+00, -6.35807589e-02,
-7.21657500e-02, -3.94345224e-01, -1.61118472e+00,
-2.09853220e+00, -1.74829650e+00, -1.63650048e+00,
-8.19590211e-01, 1.39029658e+00, -1.08105826e+00,
9.54563320e-01, -2.45660558e-01, 1.58328325e-01,
-8.75376523e-01, 2.66582966e-01, -1.87432781e-01,
7.21246660e-01, -8.40381458e-02, 1.19447672e+00,
-4.73600537e-01, 1.45873940e+00, 8.10104772e-04,
-2.05817208e-01, -1.52752376e+00, -1.10937726e+00,
-7.23525463e-03, -4.19941872e-01, -2.71613806e-01,
3.62458290e-03, 1.70241439e+00, 1.91718888e+00,
2.45949292e+00, -2.03623682e-01, 2.47402489e-03,
-3.13374639e-01, -6.79554582e-01, -8.46309721e-01,
3.22608016e-02, 8.90047431e-01, -7.44785726e-01,
-1.72862804e+00, 2.01020598e-01, -2.44903684e-01,
-1.13609660e+00, -1.55704689e+00, 1.66330171e+00,
1.32214391e+00, 2.39878923e-01, -1.12259543e+00,
1.34257102e+00, -9.98122990e-02, -1.07564473e+00,
-2.33261657e+00, 1.90955549e-01, 8.28436136e-01,
-4.20752496e-01, 5.85470438e-01, -6.93775341e-02,
-4.93193954e-01, -2.67040828e-04, -5.44483900e-01,
-2.58720726e-01, -4.89026308e-04, -1.60552517e-01,
-1.49622440e-01, 1.54853606e+00, -5.87168396e-01,
-6.35198772e-01, 2.25095987e-01, -5.94152927e-01,
1.99102557e+00, 1.50338161e+00, -8.26210380e-05,
-4.27157700e-01, -1.61091459e+00, -1.39991927e+00,
6.07062042e-01, 2.01317146e-01, 1.30773172e-01,
-7.99341738e-01, 1.38938677e+00, 7.16052949e-01,
3.36138974e-03, -2.32544374e+00, 5.17115831e-01,
4.42067355e-01, -6.00564420e-01, 9.90183115e-01,
-1.35041907e-01, 9.41010192e-04, 3.91412795e-01,
2.52685742e-03, -1.16461420e+00, -2.11624909e-04,
-1.02375209e+00, 1.72988999e+00, 1.85421528e-03,
-1.02008716e-03, -1.70282915e-03, -6.33999705e-02,
4.61248547e-01, -1.92970470e-01, -1.26391459e+00,
2.91066375e-02, 8.59046530e-04, -1.53414616e-02,
-5.30382633e-01, 8.29643488e-01, -1.87574756e+00,
-5.31976819e-01, 1.51012003e-01, -3.30351621e-01,
-1.81397959e-03, -1.44155920e+00, -1.75739333e-01,
-1.05852532e+00, -3.65820862e-02, -4.42587712e-04,
-1.21881319e-02, 2.14551277e-02], dtype=float32))
(u'Model_hidden_0_128_W:0', array([[ 0.04134899, -0.00745308, 0.19191252, ..., -0.03076985,
0.06962755, 0.02455787],
[ 0.17484324, 0.19093306, -0.00090504, ..., 0.0118733 ,
-0.02191254, 0.01320199],
[-0.03764554, -0.06488659, -0.12693322, ..., -0.0074618 ,
0.12626301, -0.07434369],
...,
[ 0.09168717, 0.00941177, 0.08883987, ..., 0.02868319,
0.1278563 , 0.0757451 ],
[ 0.05085058, -0.02898657, 0.09589181, ..., 0.0288339 ,
0.04577804, 0.062384 ],
[-0.08041579, 0.00115247, -0.08465376, ..., -0.02095235,
0.06077712, -0.13398924]], dtype=float32))
(u'Model_hidden_0_128_b:0', array([ 2.94011045, 0.48840877, 1.20935738, -0.04952197, 0.61876911,
-0.89692044, -2.19924617, -0.38059813, -0.25143304, 2.32927966,
-1.51613474, 0.17767864, 0.53061426, 0.10266392, 0.41245803,
0.95076936, -0.01063254, -0.07422983, -0.18641002, 0.42750645,
-0.58535057, -1.17651832, 0.64183885, -1.87774849, 0.2006838 ,
-0.58841062, 0.67821777, 1.11258399, 0.05622358, 0.42529657,
-0.08554213, 0.20395184, -0.28692102, -0.58176321, -1.93088222,
-0.06197373, 0.70613688, -1.41740716, -1.01538813, 0.68636763,
0.64668787, -0.60642546, 1.01247728, -0.06100448, 1.17552221,
-1.02241528, 0.575122 , -1.75160897, -0.34619603, 0.5212999 ,
-0.33406183, 0.10332689, -0.21816114, 1.57845283, 0.65752804,
-0.91256273, -0.08170488, 0.74671906, -1.70069063, -0.88528955,
1.2827822 , -1.5063051 , 0.13392296, 1.05593956, -0.34578806,
0.61251688, -1.06267202, 3.15583706, 0.30452988, -0.39726979,
-0.26331845, 1.36659849, 0.69692278, -0.86336941, 0.92846817,
0.30386725, 0.43000257, -0.48851153, 1.8062706 , 0.71929646,
0.87697315, 1.66088271, -0.26088172, 0.90015447, -0.0402176 ,
-0.63924247, 1.85995018, -0.94283044, 1.27358222, 0.93061185,
0.31474483, 1.24380469, 0.91937011, 2.78511739, 0.27723432,
0.23309611, -3.15904236, -1.42684078, -1.97282159, -0.98741335,
-1.01541901, -1.28754437, -0.53513163, 1.35619116, 1.37352538,
-0.48469797, -2.18140173, 0.41599897, -1.54598212, -1.12542593,
-0.23337622, 1.75334847, -0.32699126, 1.8831619 , 0.62134242,
-0.42180139, -0.4066692 , 1.20960999, 0.39590666, -0.98209816,
-0.15477884, 1.069754 , -0.22998421, -0.05455698, 0.17887689,
0.24888602, -2.6922524 , -1.29348612], dtype=float32))
(u'Model_output_W:0', array([[ 2.43938668e-03, 5.59420884e-02],
[ 2.25047506e-02, 6.99265674e-02],
[ 5.06469123e-02, -2.16989145e-02],
[ -4.25893348e-04, 3.53579489e-05],
[ 5.84393321e-03, 1.16009144e-02],
[ 6.32881820e-02, 1.29767090e-01],
[ -1.18366070e-01, -2.20542680e-02],
[ -2.34604580e-04, -1.57151357e-04],
[ 8.71375742e-05, -1.96916502e-04],
[ 1.80203700e-04, -1.79807648e-05],
[ 1.40943909e-02, 2.42589191e-01],
[ 2.91988108e-04, -8.20583518e-05],
[ 7.61878788e-02, -1.46540266e-03],
[ -1.35733026e-05, 2.58095642e-05],
[ -5.57361345e-04, -7.53637141e-06],
[ 1.80406234e-04, 1.96784575e-04],
[ 9.19854665e-06, -2.62969523e-04],
[ 1.72195584e-02, 5.08184731e-02],
[ -1.12305582e-01, -1.45441471e-02],
[ 1.66822283e-04, 6.87739011e-05],
[ -1.56990960e-02, 2.14757144e-01],
[ 1.46314159e-01, -3.55630182e-02],
[ -1.53432484e-04, 1.78284565e-04],
[ 5.36936820e-02, 6.68627582e-03],
[ 1.66055434e-05, 2.15135820e-04],
[ 2.71515048e-04, 6.14340301e-04],
[ -3.89422494e-05, 2.00354363e-04],
[ -3.50878894e-04, 3.18130442e-05],
[ 1.17975833e-06, 1.29534266e-04],
[ 1.27743930e-04, 3.75035015e-04],
[ 7.62348473e-02, 9.76544246e-02],
[ 6.19688595e-04, -4.33973546e-05],
[ -3.33765000e-02, -5.27138039e-02],
[ 2.78572291e-02, -9.75120366e-02],
[ 2.91663222e-03, 1.48846926e-02],
[ 3.64067638e-03, -6.82678400e-03],
[ -2.45546061e-03, -3.63216922e-02],
[ -2.28272937e-03, 3.82876955e-03],
[ -3.08104005e-04, -1.64364174e-04],
[ 5.06984885e-04, -6.68458460e-06],
[ 5.00665803e-04, 7.57608941e-05],
[ 2.32965942e-03, -1.32583633e-01],
[ 8.81642918e-04, -3.21783446e-04],
[ -1.86123997e-01, -9.44216773e-02],
[ 3.90325077e-02, 3.63103230e-03],
[ 1.23859264e-01, 1.74072664e-02],
[ -9.47408727e-04, 5.07034711e-04],
[ 8.14564526e-03, 5.28435670e-02],
[ 1.69526801e-01, -1.46105057e-02],
[ -6.09674666e-04, 4.23932433e-05],
[ 6.88266446e-05, -2.04836779e-05],
[ -5.00626105e-04, 3.09266470e-04],
[ -2.90150847e-03, -8.17328170e-02],
[ 1.96454028e-04, 3.61040962e-04],
[ 6.80968456e-04, 5.10167592e-05],
[ 1.57216743e-01, -4.29098420e-02],
[ 8.35290775e-02, -4.17324901e-03],
[ -9.30454880e-02, -2.49522049e-02],
[ 2.25731498e-03, -1.23999543e-01],
[ -1.78310758e-04, -3.03901870e-05],
[ -1.63990961e-04, 2.82760651e-04],
[ -6.85344264e-02, -3.85562591e-02],
[ 3.30738781e-04, 1.73869834e-04],
[ -3.48665693e-04, 7.29520980e-05],
[ 3.85797257e-03, 1.08272962e-01],
[ -3.35904682e-04, -1.16648211e-04],
[ -1.25270143e-01, -5.09848781e-02],
[ 1.66047795e-03, -1.61046665e-02],
[ -7.54345892e-05, 1.18037329e-04],
[ -1.22764986e-02, -3.29930224e-02],
[ -9.49943089e-04, -4.53803877e-05],
[ -3.49162117e-04, 1.60069263e-04],
[ 2.85913935e-04, -2.71881348e-04],
[ 3.20295594e-03, 4.36021984e-02],
[ -1.26550794e-01, -1.40310107e-02],
[ -1.55971274e-02, 1.48384012e-02],
[ 6.21559157e-04, -1.26169645e-04],
[ 1.98855196e-04, 4.03717204e-05],
[ 8.05129166e-05, -7.99933114e-05],
[ 6.64928986e-04, -3.53904936e-04],
[ 9.52460920e-04, 1.72379194e-04],
[ -6.42936036e-04, 1.64607962e-04],
[ 1.11896894e-03, 2.28235585e-04],
[ 2.50327837e-04, -1.57524846e-04],
[ 9.99083072e-02, 6.63149217e-03],
[ -2.08789104e-04, -1.70978659e-04],
[ 5.13488703e-06, 1.45796337e-04],
[ -1.31466389e-01, -2.27892660e-02],
[ 4.35177266e-04, 3.81944847e-05],
[ 3.05229391e-04, -1.99911156e-05],
[ 2.73365411e-04, -9.41671169e-06],
[ 5.50587662e-03, 4.43387181e-02],
[ 1.15521610e-01, -8.18602517e-02],
[ 1.62955359e-04, 2.40612397e-04],
[ -2.48767063e-03, -3.12391911e-02],
[ 7.64931901e-04, -8.77394923e-05],
[ -1.86761320e-01, -4.58077863e-02],
[ 3.01664546e-02, 4.22456339e-02],
[ 1.04119495e-01, -5.33892214e-03],
[ 1.60375595e-04, -6.70122899e-05],
[ 3.44490370e-04, -1.51264452e-04],
[ 8.32473710e-02, -9.10020024e-02],
[ -4.95950356e-02, 2.77279988e-02],
[ 8.89947638e-02, 1.93049699e-01],
[ -6.44757165e-05, 1.69104344e-04],
[ 5.60516119e-02, -2.49898084e-03],
[ -1.47032004e-03, 5.81870005e-02],
[ -1.05640665e-03, -3.17052414e-04],
[ -8.27459097e-02, 3.59309465e-02],
[ 1.75866950e-02, 1.18414298e-01],
[ -8.75813930e-06, -4.87862009e-04],
[ 1.87063735e-04, -7.66512458e-05],
[ -8.50754678e-02, -1.52024820e-01],
[ 1.74952816e-04, -4.01858502e-04],
[ 4.23618418e-04, 4.84474993e-04],
[ 1.60368072e-04, 2.68101518e-04],
[ 3.10265605e-05, 2.09181773e-04],
[ -5.96750069e-05, -1.93356856e-04],
[ 3.38303633e-02, -8.45330805e-02],
[ -1.12265363e-01, -5.32370433e-02],
[ 4.81329411e-02, -2.80063711e-02],
[ 4.29912849e-04, -2.51988404e-05],
[ -6.47091493e-03, 3.51689430e-03],
[ -2.69410620e-03, -3.79675291e-02],
[ -3.05774971e-03, -3.61146927e-02],
[ -3.34236938e-05, 2.72341014e-04],
[ 8.26384348e-04, -2.88785668e-04],
[ 4.05267179e-02, 7.65464604e-02]], dtype=float32))
(u'Model_output_b:0', array([ 0.70250142, 0.06752016], dtype=float32))
(u'Reward_encoding_0_128_W:0', array([[ 2.5925715 , 1.25417209, -1.26291072, 2.13559532, -1.71008313,
-0.73086613, -2.26742673, -1.47413111, 0.86606514, -1.25549543,
1.601524 , -1.72478485, 0.27136588, 2.33699608, -0.50039101,
2.9648869 , 1.62520564, 0.36673763, -0.73799253, 2.38690448,
-2.15540218, 2.1482439 , -0.02013778, 2.15168858, -1.28366899,
-1.77823496, 2.86371589, -1.03575015, -3.02416563, 1.91179526,
1.90555871, 2.5816958 , -1.64370668, -0.74010253, -1.84913039,
2.03911734, 2.35963535, -0.42140922, 1.33129215, 0.14507623,
-2.35177493, 0.92792273, 2.40959978, 2.0891633 , 1.40986061,
3.34300256, 1.94283569, 1.59952044, 2.31795406, 1.56604159,
0.10818011, 0.8710714 , 3.18787289, 1.52789986, 3.09617043,
1.90274012, -0.1540709 , -1.70567 , 1.66119099, 4.22109318,
1.54846942, 1.81622338, -1.39591491, 3.00529766, -1.53897858,
0.98128682, 1.62096667, 1.83414936, 2.70502496, 2.16010785,
-2.40697813, -1.98604143, -3.16975856, -1.96855569, 1.90068805,
2.62304115, -1.32634223, 2.53233933, -2.22632217, 1.15141726,
-1.28295195, 2.60403514, 3.07142067, 2.91946769, 2.0002594 ,
-2.38268399, -1.70151794, 2.41391969, 0.17436765, 2.42355895,
-0.02855963, 0.83681923, 1.06309462, 2.03629327, -0.30931363,
-2.42069697, -2.31475067, 0.4983893 , -1.89014888, 3.51974654,
0.46744192, -1.54447353, -2.25630784, -0.74816382, -2.40577126,
-0.9814328 , -0.90963966, 2.86127448, -1.63319778, 2.23488379,
2.748806 , 2.16255116, 1.82304609, 1.42134655, 2.19166851,
0.61479986, 1.77983391, -1.48165655, -2.29527354, 1.7839334 ,
-2.09698129, 3.50800323, 2.11703658, 0.26864848, -2.7061677 ,
2.59127712, 0.3949452 , -0.10404633],
[ 0.02497612, 2.03607321, 1.5314616 , 0.18856528, 0.91762394,
-1.94377255, 0.4258534 , 0.1502945 , -1.88310504, -2.57327414,
-0.62320107, -2.44506001, -2.05389857, -1.2437166 , -2.76261091,
0.78037077, -2.1091795 , -3.3455174 , -1.82638621, 0.29248336,
-0.1620992 , -0.56813431, 3.32576275, 0.50758278, -0.5962193 ,
0.38917756, 0.35429448, 1.57817101, -0.64520919, -0.77899939,
1.38038206, -0.33662939, -0.6122281 , 2.4006412 , 1.5236572 ,
0.30089423, -0.56876022, -1.63111091, 1.97148955, 1.59371889,
0.20409633, 1.73359275, 2.04132771, -2.14762926, -0.46807739,
0.16695313, -0.95788449, -0.8182081 , -1.13116527, -0.36138344,
-2.58111453, -2.04943347, -0.46909854, 2.83260584, -0.49712887,
-0.77164567, 2.96313548, -0.10525791, -1.57250118, -0.79337305,
1.63283813, 1.86312926, -0.45089582, -0.8467229 , -1.10746789,
-0.71921527, -1.52906907, -0.2855494 , 0.24281535, 1.64793825,
0.91479862, 1.98685539, 1.37949729, 0.87017977, -0.67403573,
0.11254553, -1.40369666, 1.23851442, 2.33857656, 0.81937772,
-1.16069937, -0.64599925, 1.15474403, 0.48577413, 0.88328451,
0.22059409, 1.66202581, 1.39403403, -2.07324886, 0.54862881,
2.29776764, 2.03299522, -3.15009379, -0.4524388 , 0.80189514,
-1.15655386, 0.75667381, 2.0122931 , 1.0037626 , 0.1207225 ,
3.02643228, -3.57754064, 1.03528857, -2.93485045, -0.01440706,
1.71545529, 1.68910813, -0.41797295, -1.67322934, -1.40101683,
0.42059919, -0.80745572, 1.02882624, 0.73659497, -0.89047903,
2.49539876, -2.99796247, 0.05994392, 0.67272764, -0.82278854,
0.45440042, -0.54082972, -0.25799248, -2.23502922, 0.87125587,
1.66745794, 2.1647954 , 2.25947809]], dtype=float32))
(u'Reward_encoding_0_128_b:0', array([-0.96865088, -0.94214916, 0.72433484, -0.88120192, 0.88101649,
1.41014421, 1.03241634, 0.69384986, 0.11696249, 2.0091908 ,
-0.437576 , 1.55131328, 0.92930162, -0.54778117, 1.77904975,
-1.42272246, 0.1350178 , 1.69373262, 0.93464112, -1.0553925 ,
1.08413136, -0.36057922, -0.2688444 , -0.95787781, 0.8627308 ,
0.75436062, -1.26439893, 0.58768952, 1.91256511, -0.52541548,
-1.09062552, -0.9279058 , 1.38239551, 0.52701181, 0.68503076,
-0.91527313, -0.75198466, 0.73702061, -1.09431183, -0.14061858,
1.18979537, -0.64636642, -1.69840896, -0.0527797 , -0.17902611,
-1.3533051 , 0.01562131, -0.46203947, -0.53930801, -0.51386601,
0.74719924, -0.13414717, -1.12633514, -1.20644355, -0.69137406,
-0.50292444, -0.98633581, 0.86323023, -0.09097352, -1.42593861,
-0.98045987, -1.15958905, 0.8630178 , -0.91646588, 1.02707601,
0.36428317, -0.093058 , -0.64619309, -0.90231776, -1.45714784,
1.56196547, 1.37256563, 1.3342644 , 0.90840971, -0.53832078,
-1.0378387 , 1.62434864, -1.31567526, 0.83767378, -0.68889123,
0.937675 , -0.82967728, -1.57376778, -1.34173834, -1.03628552,
1.17479491, 0.84152639, -1.33457923, 0.65665078, -1.14802575,
-0.04882829, -0.55851394, 0.70935822, -0.66723561, -0.26025942,
1.6728996 , 1.02916193, -0.32531077, 0.79830086, -1.40541923,
-0.60898817, 1.82987094, 0.9916361 , 2.14866924, 1.2095325 ,
0.30391014, 0.8101458 , -1.01301241, 1.69582486, -0.39743313,
-0.7658025 , -0.60973853, -0.99169183, -1.55712497, -0.57767808,
-0.46843076, 0.42338297, 0.70697761, 1.02828717, -0.46055272,
0.97452474, -1.22998607, -0.77008367, 0.6896261 , 2.29085374,
-1.48301768, -0.4496088 , -0.75577509], dtype=float32))
(u'Reward_encoding_1_128_W:0', array([[ 4.76499140e-01, -2.20796287e-01, -5.92657402e-02,
1.15707986e-01, 1.33083987e+00, 4.72625405e-01,
-6.42385900e-01, 2.59454536e+00, 5.37650764e-01,
1.24659991e+00, -1.64825916e-01, 1.32427490e+00,
-9.07496154e-01, -7.57403970e-01, 1.54092740e-02,
3.64925474e-01, 1.14826310e+00, 4.60522100e-02,
2.77157694e-01, -1.33809209e+00, -2.53070265e-01,
5.84043741e-01, -1.01582563e+00, 1.78370457e-02,
-6.03367448e-01, 5.62290072e-01, 1.90181509e-01,
4.71322417e-01, -7.93119371e-01, 5.80193941e-03,
-8.30242276e-01, -3.29007432e-02, -8.40659976e-01,
-8.10154527e-02, -1.68154645e+00, -3.21893722e-01,
1.11055303e+00, -4.46316367e-03, 9.99986708e-01,
2.55292118e-01, -1.99664259e+00, 1.86688280e+00,
-4.34519276e-02, -8.31706524e-01, -2.81001516e-02,
-1.47625832e-02, 1.07165301e+00, -3.56161380e+00,
8.99102271e-01, 8.61781120e-01, -6.52338088e-01,
-6.22219801e-01, 1.10518456e+00, 4.98555034e-01,
2.19628006e-01, 1.98764622e-01, -1.17907357e+00,
6.50045931e-01, 1.03201091e+00, 4.09647495e-01,
-3.34217906e-01, 4.66810673e-01, -8.71326864e-01,
-4.56517283e-03, 3.57574858e-02, 3.15211326e-01,
5.65982819e-01, -6.02874339e-01, -4.60490817e-03,
-1.56810209e-02, 9.65321004e-01, -5.29495589e-02,
7.73043454e-01, -1.01661766e-02, 1.16263878e+00,
1.65993437e-01, 5.48864782e-01, -3.44154648e-02,
9.09340978e-01, 1.18474114e+00, 1.74554324e+00,
-8.02219093e-01, -3.19400616e-02, -1.52818716e+00,
1.58570981e+00, 2.35215902e-01, -6.07176483e-01,
6.49720132e-01, -1.00122571e+00, 8.50680709e-01,
2.23756745e-01, -9.89084169e-02, 1.40152442e+00,
-2.02584942e-03, 4.25671399e-01, -8.74954015e-02,
-1.15120697e+00, -1.21545267e+00, -8.71081829e-01,
-9.50884819e-01, 7.77656198e-01, -8.08137730e-02,
4.27433848e-01, 1.52153850e+00, 1.09514451e+00,
-1.98025715e+00, -1.52405620e+00, 1.48977923e+00,
6.11936040e-02, -3.40559661e-01, 7.92392075e-01,
1.50923932e+00, -1.15533161e+00, -1.17920232e+00,
2.25546694e+00, 1.29590833e+00, -3.34109247e-01,
-1.36019080e-03, 3.16900164e-01, -1.22074023e-01,
-1.77369177e+00, 2.17186761e+00, -2.88195219e-02,
6.43402219e-01, 1.24598157e+00, -9.93886530e-01,
-8.51455867e-01, 4.61170524e-02]], dtype=float32))
(u'Reward_encoding_1_128_b:0', array([ -4.58243072e-01, 2.51842111e-01, -2.56288853e-02,
-1.28969371e+00, -1.18382692e+00, -4.36541617e-01,
7.71907568e-01, -1.77981913e+00, -5.30279338e-01,
-2.21542120e+00, 1.67489886e-01, -1.18200481e+00,
9.70692158e-01, 8.10533285e-01, -2.32029427e-02,
-1.78384170e-01, -6.37198210e-01, -3.13655287e-02,
-2.70991236e-01, 1.32792985e+00, 2.57118791e-01,
-4.95196879e-01, 1.01490629e+00, -2.80046314e-02,
5.87987661e-01, -4.92751241e-01, -1.44111082e-01,
-4.40921962e-01, 7.96014965e-01, 2.03833673e-02,
8.70526135e-01, -7.23793358e-03, 8.66948128e-01,
3.86499353e-02, 1.65978730e+00, 2.62024224e-01,
-1.03353405e+00, 2.54650717e-03, -8.22923422e-01,
5.14212623e-02, 1.25511932e+00, -1.18086839e+00,
1.76622290e-02, 8.32203209e-01, 1.11220917e-02,
1.45268934e-02, -9.55925047e-01, 1.91847086e+00,
-6.93966091e-01, -6.57750785e-01, 7.05223024e-01,
6.82390332e-01, -9.63781953e-01, -5.03551006e-01,
-2.27872103e-01, -2.05681682e-01, 1.17101562e+00,
-5.15501678e-01, -9.25370693e-01, -3.54816437e-01,
2.08925173e-01, -3.80515277e-01, 9.82987225e-01,
-3.59801054e-02, -3.37884985e-02, -3.15766782e-01,
-5.67766547e-01, 6.29731238e-01, -4.59050126e-02,
6.40230775e-02, -8.33289206e-01, 8.87009129e-03,
-4.57916707e-01, -7.90927035e-04, -1.14628303e+00,
-1.73917115e-01, -3.05330604e-01, 7.65495002e-02,
-8.95828605e-01, 6.57428980e-01, -1.55417395e+00,
8.17589700e-01, -4.80616372e-03, 1.49933553e+00,
-1.45443594e+00, -2.41397575e-01, 3.33136916e-01,
-5.64145446e-01, 9.96778369e-01, -7.29937255e-01,
-1.60592318e-01, -1.19016366e-02, -1.23587286e+00,
-1.32581079e-02, -4.25043941e-01, 1.12055242e-01,
1.14641201e+00, 1.21384943e+00, 1.06668723e+00,
1.21994019e+00, -7.34939396e-01, -1.54507207e-02,
-2.66214788e-01, -1.28366661e+00, -9.72650886e-01,
1.70621598e+00, 1.45167220e+00, -1.01175821e+00,
-1.91738196e-02, 3.33783954e-01, -6.07802391e-01,
-1.09877813e+00, 1.14534688e+00, 1.16847205e+00,
-1.52562237e+00, -1.12734687e+00, 3.47897589e-01,
-3.97057794e-02, -2.64860541e-01, 1.11990757e-01,
1.75277734e+00, -1.82037735e+00, -1.31065296e-02,
-8.23405385e-01, -1.10197222e+00, 1.02221799e+00,
1.09179115e+00, 4.23725061e-02], dtype=float32))
(u'Reward_hidden_0_128_W:0', array([[ 0.83349037, 1.21296501, 0.00448853, ..., 0.60700268,
1.33233881, -0.72853488],
[-0.94406837, -1.05251074, 0.23272924, ..., -1.81358683,
0.64362174, 0.21876411],
[-0.75013733, -1.33329594, 0.41544521, ..., -1.52459323,
-0.99371088, 0.70020485],
...,
[-0.04463024, -1.58162189, 0.18725665, ..., -1.17390537,
-2.04530907, -0.02644207],
[-0.9411692 , -0.77221578, 0.13814911, ..., -0.49725607,
-0.90558648, 0.09993076],
[-0.24327403, -0.53596449, -0.14710134, ..., 0.25672656,
-0.29638532, -0.15569364]], dtype=float32))
(u'Reward_hidden_0_128_b:0', array([ 0.53109473, 1.27395785, -0.44147992, -0.19489935, 0.85341161,
0.22593337, -2.55004549, -1.053545 , -0.57657647, 0.26782432,
-0.45197284, 0.20966217, 0.48517573, -0.66428554, 0.04668872,
1.47446108, 1.29936957, -0.95409757, 0.17366379, -0.81762749,
1.13768721, 0.36996326, 0.21051137, 1.88902354, -0.50809336,
0.30507299, 0.79642922, 1.08679497, 0.13565618, 0.79041481,
-1.87669086, 1.45954871, 1.54843187, 0.15835157, -0.17223334,
-0.02407236, 0.08191435, 0.39530134, -0.21003407, 0.48097575,
-1.38339186, 0.50607795, -1.56209612, -0.32403526, 0.34987643,
0.4588398 , 0.88887703, 1.26925516, -1.00344837, -0.4520852 ,
-0.59809333, 0.81907117, -1.01680505, -0.88006854, 1.45424867,
-0.62075526, -2.53952122, 1.58328462, -1.32298636, -0.76918042,
0.87664121, -0.25924006, -0.19229053, 0.44402516, 1.27318716,
0.53509444, -1.98459971, 0.55147839, 0.59365523, -0.84507781,
-1.65072918, -0.67001617, 0.24511741, 1.151335 , -0.66665804,
2.05744648, 0.46976298, 0.37840071, 1.76448905, -2.23876739,
-1.86933517, -2.39598942, -1.48273695, -0.41978636, -0.6395117 ,
-0.29402915, 0.59695673, -0.88752192, 1.21187508, 1.11320829,
0.78605491, 1.24412513, -0.83418477, 0.20704107, 0.16547216,
-0.14150801, -1.40515149, -1.81337595, -0.94795465, 0.16856134,
-1.69233537, -0.97976893, 0.02120675, -1.24542999, -0.97938287,
0.75847763, 1.61859965, 0.10126536, 0.29146612, 0.34719744,
1.3495791 , 0.52708435, 1.15654457, 1.39904749, 1.02346241,
-1.07310879, -0.682863 , -0.21261062, 0.45747 , -0.61179149,
0.06364289, -0.71747661, -0.63275796, 0.48339888, 1.09126174,
0.2991468 , -0.59222394, -0.00430499], dtype=float32))
(u'Reward_output_W:0', array([[ 2.58039761],
[-2.09393883],
[-0.22107202],
[-2.62154412],
[-0.81057435],
[ 5.40187168],
[-2.52775955],
[ 1.26408172],
[ 1.51062751],
[-0.2275949 ],
[-2.92523336],
[ 0.81043476],
[-5.91224909],
[-3.27910423],
[ 1.7780056 ],
[-0.13048044],
[ 1.68519902],
[-6.14645529],
[ 2.41417789],
[ 1.37945807],
[ 2.94042277],
[ 3.53901076],
[-0.43131435],
[ 0.23009761],
[-2.31188965],
[ 0.23374134],
[-1.10170197],
[ 1.97306156],
[ 1.87295067],
[ 1.74603128],
[-0.7139923 ],
[ 3.55514574],
[ 6.45685816],
[ 1.88998842],
[ 1.73251212],
[-0.52584445],
[ 1.91720748],
[-4.62714481],
[-0.34051469],
[-3.77927375],
[-0.36231095],
[-9.90133762],
[-0.08712806],
[-0.69359946],
[ 2.91437721],
[-1.44189525],
[-0.34605822],
[-0.35763028],
[-1.27317631],
[ 0.98678094],
[ 1.16916788],
[ 1.92442763],
[ 1.76520586],
[ 1.76317167],
[-0.50724256],
[ 1.25427091],
[ 4.17573071],
[ 1.40794384],
[-5.04222775],
[ 2.46109891],
[ 3.44191289],
[ 1.60664964],
[-2.95860052],
[-0.81211954],
[ 2.52723384],
[ 0.92661214],
[ 1.31705165],
[-4.8199873 ],
[ 2.11865544],
[-0.76884735],
[-1.22868407],
[-3.22862983],
[-6.01477194],
[ 1.73178911],
[ 1.71447945],
[ 1.0948422 ],
[ 1.78660893],
[ 0.92327875],
[-1.3133651 ],
[-3.1142261 ],
[-1.49831867],
[-0.32971111],
[-1.35873163],
[-2.32107878],
[ 0.93830973],
[ 2.79281139],
[-1.62387419],
[-0.34731704],
[ 1.12364995],
[-5.87035179],
[ 0.98888129],
[ 1.68647969],
[ 2.20077085],
[ 3.10455322],
[-0.84159404],
[-0.64523643],
[ 6.4389267 ],
[ 0.7573837 ],
[-5.71666718],
[ 1.76092994],
[ 1.1271162 ],
[-0.33683059],
[-0.47232112],
[-0.23483577],
[-1.83075368],
[ 3.23180628],
[ 1.81414735],
[ 1.70084441],
[ 2.9925909 ],
[-0.43085778],
[-0.36817575],
[-1.48089552],
[ 0.99976552],
[-2.04348421],
[-2.79764915],
[ 1.34251904],
[-2.8227613 ],
[-0.42931676],
[ 1.58135033],
[-2.12122178],
[ 0.22943448],
[-2.66062498],
[-2.12325072],
[ 1.24527967],
[ 1.61775446],
[-1.99071169],
[-6.56041765],
[-0.74085009]], dtype=float32))
(u'Reward_output_b:0', array([-1.30134094], dtype=float32))
(u'Value_hidden_0_256_W:0', array([[ 1.58855450e+00, -4.31995535e+00, 1.62565875e+00,
1.61038792e+00, 5.82926631e-01, 2.15514889e+01,
-5.04894400e+00, -8.83786011e+00, 1.52040923e+00,
5.96569955e-01, -4.33716822e+00, 6.42487764e-01,
1.58227074e+00, -5.95761013e+00, -7.78372955e+00,
7.57033682e+00, -3.90789104e+00, -5.26107407e+00,
1.59770036e+00, 5.35800219e-01, 7.18689442e+00,
-1.37224770e+00, 2.91904539e-01, 1.59479828e+01,
1.68982458e+00, 1.45033627e+01, 3.22896051e+00,
-4.56743526e+00, -9.54135227e+00, 3.07688785e+00,
1.04872837e+01, 1.74967899e+01, 1.60816169e+00,
1.55734987e+01, 1.84725990e+01, -7.47812939e+00,
1.60457659e+00, -8.24539566e+00, -4.51991367e+00,
-8.10076809e+00, -3.96850324e+00, -9.61819935e+00,
-1.21516571e+01, -5.63216066e+00, 1.52078462e+00,
1.58256662e+00, 1.54521122e+01, 1.54699886e+00,
-9.51437569e+00, -4.15675449e+00, 1.92958653e+00,
1.54420292e+00, -7.58803368e+00, -1.12632875e+01,
6.52852535e-01, 1.58918190e+00, -8.44942570e+00,
-3.88802361e+00, -7.10462189e+00, 6.35077238e+00,
1.18670082e+01, 6.63772762e-01, -9.36955929e+00,
-5.46086359e+00, -1.96305656e+00, -2.25835013e+00,
-7.11876202e+00, 1.91773930e+01, 1.57413054e+01,
1.27666559e+01, -7.53592682e+00, -7.67729378e+00,
-1.57136548e+00, 4.36435789e-01, 1.53513491e+00,
1.62366152e+00, -6.13782358e+00, 1.60135531e+00,
-4.77598935e-01, -5.63575220e+00, -8.01776981e+00,
5.79482436e-01, 1.61417675e+00, -5.87629616e-01,
-7.35989952e+00, 1.13654652e+01, -7.46784258e+00,
1.57638931e+00, -3.57466435e+00, 1.02233493e+00,
-6.41943502e+00, 2.09273791e+00, -6.52351236e+00,
1.50494826e+00, 8.56918246e-02, 1.63634014e+00,
1.60724688e+00, 8.35537255e-01, -5.46696234e+00,
3.48573804e+00, 6.52499080e-01, -5.08095789e+00,
1.57052815e+00, 9.29087043e-01, -4.66365767e+00,
-7.51616240e-01, -9.44585991e+00, -8.03517342e+00,
2.11281509e+01, 3.34725499e-01, -6.31357431e+00,
1.03965015e+01, -7.15171289e+00, 5.80116808e-01,
1.55601561e+00, 1.69726408e+00, 1.55673885e+00,
1.24038992e+01, 1.61313725e+00, -6.90878105e+00,
-1.20549707e+01, -1.17716064e+01, -8.22774982e+00,
-7.82575035e+00, -4.38699961e+00, -4.56822157e+00,
-4.54017496e+00, -6.40132427e+00, -1.40845194e+01,
-4.78392422e-01, -6.94685078e+00, 1.32337141e+01,
-6.97252178e+00, -4.30403471e+00, -7.30491114e+00,
-2.00038815e+00, 4.63165402e-01, 1.50174308e+00,
-3.96858764e+00, -4.56358719e+00, -8.52467346e+00,
1.58378053e+00, 2.81396359e-01, 6.04772389e-01,
-4.06528187e+00, -4.34716129e+00, 2.12942886e+01,
1.56881523e+00, 1.59353006e+00, -5.47932577e+00,
6.34840310e-01, 1.06157579e+01, -4.68873692e+00,
5.94484389e-01, -6.02791452e+00, -5.09079027e+00,
-6.88960314e+00, -6.82970285e+00, 1.58436716e+00,
1.54521644e+00, 1.59678066e+00, 2.00998116e+01,
-5.05495882e+00, 6.50235116e-01, 1.06944885e+01,
1.27194538e+01, 5.47508538e-01, 1.57479119e+00,
1.80495663e+01, 2.11008663e+01, 1.26314468e+01,
-5.30979729e+00, -5.14125872e+00, 8.61612678e-01,
-7.76932383e+00, -5.10980511e+00, 1.75093441e+01,
-6.17395258e+00, -4.84902477e+00, -6.44792557e+00,
-9.14117813e+00, -6.56304169e+00, -3.76356292e+00,
1.66115558e+00, 4.60527152e-01, -4.45146227e+00,
1.50267854e-02, 3.32092106e-01, 1.28787339e+00,
1.53333163e+00, 1.64174354e+00, 1.59147060e+00,
1.12016191e+01, 1.54059684e+00, -5.14140081e+00,
-9.39861393e+00, 3.42174697e+00, 1.58446991e+00,
2.05528297e+01, -8.96297395e-01, -1.50519073e-01,
1.48532724e+00, 6.29266858e-01, 1.52543652e+00,
-6.26482677e+00, 9.83798790e+00, -1.38165474e+01,
-6.68937969e+00, 8.84234667e-01, 6.82043493e-01,
-1.71237671e+00, -6.44854212e+00, 5.90524316e-01,
1.59155452e+00, -6.28804398e+00, -5.19480276e+00,
-5.51800203e+00, -2.24002552e+00, 1.79862154e+00,
6.03401542e-01, -6.29032707e+00, 1.52731931e+00,
-6.05302954e+00, 1.59514129e+00, -3.56139708e+00,
-9.78015614e+00, 4.72735596e+00, 8.03247166e+00,
-3.80049992e+00, -4.60168982e+00, 7.81006932e-01,
-6.84611320e+00, 1.54123843e+00, -3.60955930e+00,
1.49527800e+00, 5.86860836e-01, -4.36876267e-02,
-4.04340744e+00, -4.89320368e-01, -9.23300362e+00,
2.12548714e+01, 2.82948828e+00, 5.92207789e-01,
-9.61939716e+00, -4.98700380e+00, 5.16022682e-01,
1.24706459e+01, 9.54657614e-01, -6.47740889e+00,
1.55436206e+00, 1.44689465e+01, 1.51932251e+00,
1.89697304e+01, -5.32397516e-02, -5.80393362e+00,
-5.61003113e+00],
[ -2.53003192e+00, 8.85561275e+00, -2.39549851e+00,
-2.32846856e+00, 1.09409447e+01, 7.41309309e+00,
7.30201340e+00, 7.18071938e+00, -2.21604347e+00,
-6.98181772e+00, 4.77425528e+00, -7.21540022e+00,
-2.31905389e+00, 6.65398264e+00, 8.36586094e+00,
9.60550308e+00, -3.66319351e+01, 8.78736496e+00,
-2.35704470e+00, -8.14212608e+00, 8.39083576e+00,
-1.09551525e+01, -9.57750511e+00, 1.00704412e+01,
1.04420662e+01, 1.01029634e+01, 9.23109722e+00,
8.94228363e+00, 2.39754367e+00, 1.00816164e+01,
1.03040781e+01, 1.07663774e+01, -2.39134359e+00,
1.03347092e+01, 1.03375998e+01, -2.89300976e+01,
-2.33974099e+00, -2.04396935e+01, 8.83079815e+00,
4.88398361e+00, 9.15108204e+00, -1.64957695e+01,
-1.55488987e+01, 7.86191130e+00, -2.88494802e+00,
-2.33244658e+00, 1.02982197e+01, -2.28325081e+00,
-1.67983265e+01, 8.41846085e+00, 9.44842148e+00,
-2.25091434e+00, 8.17637539e+00, 1.64591360e+00,
-6.84388351e+00, -2.31938553e+00, -2.63993225e+01,
7.42392445e+00, -3.15354404e+01, 9.53548336e+00,
1.03751183e+01, -7.44154978e+00, -6.98537409e-01,
8.05226040e+00, -8.32350445e+00, -1.97176266e+01,
8.05860710e+00, 9.62928104e+00, 1.05513630e+01,
1.03376589e+01, 6.82469273e+00, 7.69945574e+00,
-7.75090504e+00, 9.55719090e+00, -2.23841190e+00,
9.26860809e+00, 8.42105103e+00, -2.46904516e+00,
-3.14542160e+01, 8.00324535e+00, 6.79045248e+00,
-7.17443466e+00, -2.44389176e+00, -7.26350021e+00,
-3.05066242e+01, 1.01345358e+01, 7.71096945e+00,
-2.37017798e+00, 8.99473667e+00, -6.10434341e+00,
7.34316015e+00, 1.11973896e+01, 8.56594372e+00,
-2.20067358e+00, -1.41590061e+01, -2.56962395e+00,
-2.88362384e+00, -6.91472006e+00, 7.70980310e+00,
9.91966057e+00, -6.70511961e+00, -3.50559731e+01,
-2.32102513e+00, -6.44295263e+00, 8.10798454e+00,
-7.31347799e+00, -1.14895105e+00, 7.69565916e+00,
6.15086603e+00, -9.20790195e+00, 8.34171963e+00,
1.09311686e+01, -1.19396791e+01, -7.24750948e+00,
-2.27129745e+00, -2.86143541e+00, -2.28935766e+00,
1.07053108e+01, -2.40686941e+00, 6.59183979e+00,
-1.61688862e+01, 1.42805055e-01, 6.17059040e+00,
-2.49338913e+01, 7.38852882e+00, 8.26123142e+00,
9.61674786e+00, 7.67311192e+00, -1.94052811e+01,
-7.74631643e+00, 7.80756235e+00, 1.06335449e+01,
7.14223528e+00, 7.92863894e+00, 7.79319096e+00,
-1.52780609e+01, -7.01078892e+00, 1.13325663e+01,
8.69751263e+00, 7.16693926e+00, -2.37708092e+01,
-2.32609463e+00, -9.67922306e+00, -7.03246212e+00,
9.29314518e+00, 7.73265886e+00, 6.49411869e+00,
-2.32041025e+00, -2.56842732e+00, 7.75564766e+00,
-6.82216740e+00, 1.10065174e+01, 7.89880705e+00,
-6.76334906e+00, 8.70404911e+00, 8.09298611e+00,
8.08876514e+00, 6.81925583e+00, -2.54457760e+00,
-2.26071692e+00, -2.37655854e+00, 9.41113758e+00,
7.31008148e+00, -6.98374557e+00, 9.68274784e+00,
1.00587654e+01, -6.85194635e+00, -2.32674932e+00,
9.65944958e+00, 8.53087616e+00, 1.09403334e+01,
8.41519833e+00, 8.50492954e+00, -6.85334587e+00,
7.89263964e+00, 8.02979469e+00, 9.61525917e+00,
7.89022017e+00, -3.69611282e+01, 8.45080471e+00,
3.76047254e+00, 8.80344868e+00, -3.71863708e+01,
-2.52593470e+00, -7.76544523e+00, 7.15636253e+00,
9.77169800e+00, 1.00979824e+01, -4.96316195e+00,
-2.24925590e+00, -2.42273283e+00, -2.32981348e+00,
1.09897442e+01, -2.23781013e+00, 9.09359264e+00,
-2.26223543e-01, 1.09290667e+01, -2.33900023e+00,
8.92096996e+00, 1.02710476e+01, -1.13413572e+01,
-2.19393063e+00, -8.19469547e+00, -2.24113488e+00,
6.97500467e+00, 9.79874229e+00, -2.01859245e+01,
-3.11669159e+01, 1.06956215e+01, -7.09741020e+00,
1.03773127e+01, 6.20991325e+00, -6.38596296e+00,
-2.44569826e+00, 7.98239946e+00, 8.06593895e+00,
8.08996773e+00, -1.06567097e+01, -2.82093048e+00,
-6.82429457e+00, 8.87247944e+00, -2.21632695e+00,
7.37717676e+00, -2.33633327e+00, 8.73017883e+00,
-2.54469061e+00, 9.08958340e+00, 1.00473843e+01,
8.08142662e+00, 7.81882954e+00, -6.71199751e+00,
8.37143707e+00, -2.28332496e+00, 8.59674549e+00,
-2.19673800e+00, -7.39313078e+00, 1.09586802e+01,
-9.93175888e+00, -8.80949688e+00, 3.22494650e+00,
7.31202364e+00, 9.86805248e+00, -7.79269314e+00,
-1.30298290e+01, 8.35633659e+00, -7.67892790e+00,
1.07105618e+01, -6.09063196e+00, 8.02931309e+00,
-2.35943580e+00, 1.05868444e+01, -2.23296118e+00,
9.23864365e+00, 9.58707714e+00, 7.44452095e+00,
-1.14711943e+01]], dtype=float32))
(u'Value_hidden_0_256_b:0', array([ -6.46993589e+00, 6.09683084e+00, -9.81201935e+00,
-1.20631304e+01, 5.10508919e+00, 1.52809703e+00,
7.39436388e+00, 4.20929861e+00, -1.12523947e+01,
2.69452405e+00, 1.58330650e+01, 2.92247319e+00,
-1.06632109e+01, 6.14385462e+00, 4.88588810e+00,
1.54388130e+00, 4.10624981e+00, 1.41374159e+00,
-9.85144329e+00, 3.44745803e+00, 4.56238937e+00,
5.14863777e+00, 4.18381882e+00, 2.80656457e+00,
1.99322093e+00, 3.07957006e+00, 3.97121763e+00,
4.27165508e+00, -5.44006526e-01, 3.33263922e+00,
2.48777461e+00, 2.26132298e+00, -9.23217869e+00,
3.05101466e+00, 3.04331446e+00, 2.28277588e+00,
-1.08950624e+01, 1.32179713e+00, 3.50235152e+00,
6.78618002e+00, 4.53722525e+00, 9.39071083e+00,
1.07431622e+01, 6.94016314e+00, -3.60793567e+00,
-9.99567604e+00, 2.93978000e+00, -9.98418522e+00,
1.14638412e+00, 3.00926566e+00, 3.37240744e+00,
-1.11792936e+01, 3.92959642e+00, 8.94151255e-02,
2.67251849e+00, -1.13576822e+01, 1.75200367e+00,
8.30812454e+00, 3.42434740e+00, 5.01039839e+00,
3.30549884e+00, 3.10793900e+00, -3.75198796e-02,
4.53726673e+00, 3.76743007e+00, 9.45179558e+00,
6.25736046e+00, 2.69455457e+00, 2.91050267e+00,
2.97439075e+00, 6.21127415e+00, 4.10361004e+00,
3.36644459e+00, 1.41982222e+00, -1.04660206e+01,
5.15800953e+00, 3.04828525e+00, -7.57705688e+00,
1.29447794e+01, 4.07517624e+00, 7.63801765e+00,
2.79181170e+00, -8.20702457e+00, 3.02726054e+00,
2.29492140e+00, 3.41110611e+00, 2.64843249e+00,
-8.68097591e+00, 3.47253680e+00, 2.88636136e+00,
4.89908266e+00, 3.20582271e+00, 4.53471422e+00,
-1.14519463e+01, 6.35197735e+00, -6.75388288e+00,
-4.23381090e+00, 3.00166130e+00, 5.80706978e+00,
4.16706753e+00, 2.61737156e+00, 3.18638134e+00,
-9.88213158e+00, 2.71350169e+00, 5.61807537e+00,
3.06802464e+00, 3.60509893e-03, 3.24506783e+00,
1.40661335e+00, 4.01871872e+00, 4.20545197e+00,
2.39045691e+00, 6.89080715e+00, 2.85443997e+00,
-1.22361984e+01, -5.04239798e+00, -1.09591942e+01,
2.85177827e+00, -8.99491215e+00, 6.67115259e+00,
1.07297144e+01, 4.28836383e-02, 5.22877216e+00,
3.09228182e+00, 7.95855761e+00, 3.49869156e+00,
3.98066139e+00, 6.19654608e+00, 1.22909288e+01,
3.29468155e+00, 6.84247553e-01, 3.59504700e+00,
6.01678181e+00, 8.41423512e+00, 5.38693237e+00,
7.39986610e+00, 2.71949458e+00, 3.14329648e+00,
4.21695375e+00, 8.46883011e+00, 2.43896151e+00,
-9.90820026e+00, 4.25058222e+00, 2.73047137e+00,
4.09942818e+00, 6.69231415e+00, 1.44775426e+00,
-9.69124699e+00, -6.21628523e+00, 6.28737402e+00,
2.80976367e+00, 2.36737466e+00, 4.73107767e+00,
2.61456966e+00, 3.51744080e+00, 7.51973486e+00,
3.78149986e+00, 7.47525644e+00, -6.36659050e+00,
-1.05121737e+01, -9.10657883e+00, 2.27350068e+00,
6.72624016e+00, 2.88084459e+00, 3.01717377e+00,
3.13672090e+00, 2.67095685e+00, -9.80922127e+00,
2.64621139e+00, 1.75292087e+00, 2.75703573e+00,
7.06741142e+00, 3.84294510e+00, 3.56369376e+00,
5.82183933e+00, 3.67812371e+00, 2.91328883e+00,
7.29147625e+00, 3.54948926e+00, 5.01576233e+00,
-4.38778073e-01, 4.35950089e+00, 3.85181022e+00,
-7.93558645e+00, 3.12956405e+00, 8.67310524e+00,
1.57258487e+00, 4.14394855e+00, 3.15832734e+00,
-1.16732435e+01, -9.49934387e+00, -1.05555696e+01,
2.47350860e+00, -1.08378506e+01, 3.22470808e+00,
2.07776390e-02, 1.94810867e+00, -9.48894978e+00,
2.15385199e+00, 2.86280060e+00, 5.91880560e+00,
-1.16716738e+01, 3.50796676e+00, -1.04793053e+01,
5.85563993e+00, 2.78474021e+00, 1.20669165e+01,
2.57065392e+00, 3.04297018e+00, 2.77586842e+00,
1.98068404e+00, 8.15478325e+00, 2.44499373e+00,
-7.69416380e+00, 6.07718563e+00, 6.94658184e+00,
6.12182474e+00, 5.02222061e+00, -6.13056946e+00,
2.61275077e+00, 6.75010145e-01, -1.18990774e+01,
6.34448004e+00, -1.04877586e+01, 3.03177810e+00,
9.69449803e-02, 4.94658089e+00, 1.79380786e+00,
5.66406059e+00, 6.23661661e+00, 2.60485530e+00,
5.38836861e+00, -9.76956749e+00, 2.70852590e+00,
-1.12050276e+01, 3.06299210e+00, 6.59300280e+00,
4.87766600e+00, 3.90205336e+00, -7.60238171e-01,
1.66826057e+00, 1.79832363e+00, 3.27109003e+00,
8.37677193e+00, 4.28591824e+00, 3.08773375e+00,
2.84103489e+00, 1.80365276e+00, 7.44261694e+00,
-8.07192612e+00, 3.13988066e+00, -1.06231203e+01,
2.37853694e+00, 1.08058643e+00, 4.65958118e+00,
5.91939497e+00], dtype=float32))
(u'Value_hidden_1_128_W:0', array([[ -1.25780666, -1.17092371, -1.25355208, ..., -0.93317389,
-7.35614252, -7.5642972 ],
[ 2.90516615, 0.25105655, 3.00166345, ..., 2.63310456,
7.22947454, 7.13979435],
[ -0.65097791, -0.87851095, -0.85896653, ..., -0.43622574,
-7.73628712, -7.86126614],
...,
[ 9.7937355 , -1.15326357, 10.1008215 , ..., 9.36998749,
6.19552994, 6.07367802],
[ 3.81829929, 0.20512164, 3.85689116, ..., 3.45021939,
7.263165 , 7.41106415],
[ -1.58820951, 0.64163786, -1.95892632, ..., -1.10761905,
10.46699333, 10.72078609]], dtype=float32))
(u'Value_hidden_1_128_b:0', array([ 0.84226799, 1.18302882, 1.1612705 , -0.07492125, -1.88058579,
0.13309792, 0.20504384, 9.60604191, 8.82534885, 0.85720533,
2.58401895, 8.69072723, 4.87706041, 9.80818748, 3.31918049,
1.21543324, -0.78764468, 0.78067315, 3.94023085, 3.445472 ,
0.89148057, -0.43656385, -0.50174987, -1.44359136, 0.9475199 ,
1.78578603, -0.53415048, 0.5647741 , 0.59887326, -1.507025 ,
1.08308601, -0.48177078, 0.91136318, 1.58355021, 2.40578175,
1.33149219, -0.27791637, 1.58377242, -0.07530674, 0.16699468,
-0.16718031, 0.87290472, -1.20745122, -1.23954892, 9.84553623,
0.52614009, 2.03862262, 0.3489002 , 7.67496824, 4.45513487,
0.89774954, 1.61138225, 1.9832418 , 8.29558372, -0.57602602,
1.53072011, 1.84700418, 0.27018818, 0.58351821, 1.55427396,
-0.91076845, 1.46978939, 1.23094845, 1.03668511, 0.93598187,
1.11400998, 0.90899134, -0.6142714 , 8.59216881, 2.46433353,
0.61918688, 0.86444366, 0.37071085, 3.43877673, 8.85885239,
8.61473274, 1.95188177, 0.20756646, 7.3170495 , 2.52595687,
-1.87919283, 0.16388896, 0.68811661, 1.12959158, -0.75186425,
0.15404917, -1.63201404, 3.74702382, 2.25460625, 0.1513105 ,
5.43347454, 1.8512876 , -0.34228662, 0.51394838, 0.83213943,
2.17330503, 0.98101383, 0.39970127, -0.46188936, 1.51158547,
8.18206596, 0.25980452, 0.79585361, 0.96771598, 1.32820559,
1.22119784, 2.83905244, 0.46498817, 7.50671005, 1.77912259,
0.1328591 , 0.01567538, -0.83439285, 3.48453498, 0.25987068,
-1.0048418 , -1.15252471, 5.61252213, 2.04233718, 0.07792626,
1.4678539 , 0.20005444, 9.33331108, 0.64773172, 1.65842187,
1.21791756, 9.45575428, 8.34985828], dtype=float32))
(u'Value_output_W:0', array([[ 14.08072376],
[ -6.91102791],
[ 13.70190334],
[ -7.07524157],
[ -8.7116518 ],
[ -6.8711791 ],
[ -7.95503187],
[ 11.36927891],
[ 11.23874569],
[ -7.12090158],
[ 12.71437359],
[ 11.33105946],
[ 13.76411057],
[ 11.24247074],
[ 12.41063881],
[-10.22958565],
[ 13.59835625],
[ 15.75961113],
[ 12.93144321],
[ 12.26033688],
[ -7.01160336],
[ -7.7703023 ],
[ -8.33419991],
[ -8.38042927],
[ 13.30167389],
[ -7.3757596 ],
[-10.33202744],
[ -8.19341278],
[-18.28740692],
[ -8.07747459],
[ 14.22387886],
[ -8.20703316],
[-18.28321266],
[ 12.95490742],
[ -6.85462809],
[ 11.81343555],
[ -8.01800823],
[ 13.40439224],
[ -7.87752485],
[ -7.19196653],
[ -8.23436642],
[ -8.16296196],
[ -7.91334057],
[ -7.80759048],
[ 11.23246098],
[ -8.19746304],
[ -7.22853374],
[ -6.88819075],
[ 10.94302273],
[ 12.93998909],
[ 15.80602932],
[ -9.32952309],
[-18.21645546],
[ 11.10361958],
[ -8.23092651],
[ 12.06726837],
[ -7.32524347],
[ -7.19428635],
[ -8.10025311],
[-18.34348679],
[ -6.86743212],
[ -6.88756371],
[ -6.92413425],
[ -7.26828575],
[-14.03055668],
[ -8.09986591],
[ 12.74128151],
[ 14.0692625 ],
[ 10.95670509],
[-13.74651051],
[ -8.36290169],
[ -9.30518055],
[ 14.36920261],
[ 13.94973946],
[ 11.23568726],
[ 11.2925024 ],
[ -9.28734684],
[ -8.22806454],
[ 11.464818 ],
[ 13.47125244],
[ -7.88527441],
[ -7.41429472],
[ -8.35400105],
[ -7.23832035],
[ -8.21488857],
[ -9.54511166],
[ -8.39470482],
[ 13.43649292],
[ -8.38013458],
[ 12.20474815],
[ 18.32191658],
[ 11.98346329],
[ -7.25662613],
[-12.78783703],
[ -6.79527378],
[ 12.38147736],
[-16.85344124],
[ -7.07673597],
[ -8.22855282],
[ -7.06379366],
[ 11.25438118],
[ -8.00475121],
[ -8.2504406 ],
[ -8.14599037],
[ -7.12892962],
[ 12.55915165],
[ 12.63164425],
[ 15.62143612],
[ 11.18899536],
[ 11.84920788],
[ -7.07597828],
[ -6.94098377],
[ 14.15165043],
[ 12.44956684],
[-17.26247597],
[ -8.13848877],
[ -8.34531498],
[ 13.27914715],
[ 13.06786728],
[ -6.9589138 ],
[ 11.8812542 ],
[ -7.76262903],
[ 11.25865936],
[ -7.25028324],
[ 14.01673603],
[ 15.85590935],
[ 11.06599045],
[ 11.36882591]], dtype=float32))
(u'Value_output_b:0', array([ 7.8088088], dtype=float32))
In [ ]:
Content source: karolkuna/reinforcement-learning
Similar notebooks: