In [1]:
%matplotlib inline

In [2]:
import gym
import tensorflow as tf
import numpy as np
import math
from tqdm import tqdm, trange
import random
import time

In [3]:
from experiencereplay import ExperienceReplay, PrioritizedExperienceReplay, ModelBasedPrioritizedExperienceReplay
from experiment import Experiment
from dmlac import DMLAC
import nn
from exploration import EpsilonGreedyStrategy, OUStrategy
from ounoise import OUNoise

In [4]:
settings = dict(
    environment = 'MountainCarContinuous-v0',
    timesteps = 8000,
    max_replay_buffer_size = 100000,
    batch_size = 64,
    learning_start = 256,
    forward_steps = 1,
    discount_factor = 0.99,
    trace_decay = 0.9,
    
    actor_learning_rate=0.0001,
    model_learning_rate=0.001,
    reward_learning_rate=0.001,
    value_learning_rate=0.001,
    
    actor_l2=None,
    model_l2=None,
    reward_l2=None,
    value_l2=None,
    
    actor_target_approach_rate=0.99,
    value_target_approach_rate=0.99,
    
    train_updates_per_step = 10,
    priority_updates_per_step = 100,
    
    actor_net_layers = [256, 128],
    actor_net_activation_fn = tf.nn.elu,
    actor_bounded_output = True,
    value_net_layers =  [256, 128],
    value_net_activation_fn = tf.nn.elu,
    model_net_embedding = 128,
    model_net_layers = [128],
    model_net_activation_fn = tf.nn.elu,
    reward_net_embedding = 128,
    reward_net_layers =  [128],
    reward_net_activation_fn = tf.nn.elu,
    
    environment_seed = 0,
    noise_seed= 0,
    
    gpu_memory_fraction = 0.1,
    
    render_environment = True,
    render_frequency = 10,
    render_start=7000,
)

settings["experiment_path"] = "experiments/experiment_dmlac_{}_{}".format(settings["environment"], int(time.time()))
settings["actor_tf_optimizer"] = tf.train.AdamOptimizer(settings["actor_learning_rate"])
settings["model_tf_optimizer"] = tf.train.AdamOptimizer(settings["model_learning_rate"])
settings["reward_tf_optimizer"] = tf.train.AdamOptimizer(settings["reward_learning_rate"])
settings["value_tf_optimizer"] = tf.train.AdamOptimizer(settings["value_learning_rate"])

print(settings["experiment_path"])


experiments/experiment_dmlac_MountainCarContinuous-v0_1495409405

In [5]:
def preprocess_state(observation):
    state = np.array(observation)
    if settings["environment"] == "MountainCarContinuous-v0":
        state[1] = state[1] * 10
        return state
    else:
        return state

def preprocess_reward(reward):
    return reward

In [6]:
env = gym.make(settings["environment"])
env.seed(settings["environment_seed"])

observation = preprocess_state(env.reset())
state = observation


[2017-05-22 01:30:05,805] Making new env: MountainCarContinuous-v0

In [7]:
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]

print(state_dim)
print(action_dim)
print(env.observation_space.high)
print(env.observation_space.low)
print(env.action_space.high)
print(env.action_space.low)


2
1
[ 0.6   0.07]
[-1.2  -0.07]
[ 1.]
[-1.]

In [8]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=settings["gpu_memory_fraction"])
session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
summary_writer = tf.summary.FileWriter(settings["experiment_path"] + "/logdir")

In [9]:
actor_network = nn.fully_connected("Actor", session, [state_dim], settings["actor_net_layers"],
                                   action_dim, settings["actor_net_activation_fn"],
                                   env.action_space if settings["actor_bounded_output"] else None, False)
model_network = nn.fully_connected_with_input_embedding(
    "Model", session, [state_dim, action_dim], settings["model_net_embedding"], settings["model_net_layers"], state_dim,
     settings["model_net_activation_fn"], None, False)
reward_network = nn.fully_connected_with_input_embedding(
    "Reward", session, [state_dim, action_dim], settings["reward_net_embedding"], settings["reward_net_layers"], 1,
     settings["reward_net_activation_fn"], None, False)
value_network = nn.fully_connected("Value", session, [state_dim], settings["value_net_layers"], 1,
                               settings["value_net_activation_fn"], None, False)
print(str(actor_network))


[] --> Actor_input_0
['Actor_input_0'] --> hidden_0_256
['hidden_0_256'] --> hidden_1_128
['hidden_1_128'] --> output
['output'] --> bounding


In [10]:
agent = DMLAC(actor_network, model_network, reward_network, value_network,
                 forward_steps=settings["forward_steps"],
                 discount_factor=settings["discount_factor"],
                 trace_decay=settings["trace_decay"],
                 actor_tf_optimizer=settings["actor_tf_optimizer"],
                 model_tf_optimizer=settings["model_tf_optimizer"],
                 reward_tf_optimizer=settings["reward_tf_optimizer"],
                 value_tf_optimizer=settings["value_tf_optimizer"],
                 actor_l2=settings["actor_l2"],
                 model_l2=settings["model_l2"],
                 reward_l2=settings["reward_l2"],
                 value_l2=settings["value_l2"],
                 actor_target_approach_rate=settings["actor_target_approach_rate"],
                 value_target_approach_rate=settings["value_target_approach_rate"],
                 summary_writer=summary_writer
                )

In [11]:
saver = tf.train.Saver()
summary_writer.add_graph(session.graph)

In [12]:
experience_replay = ModelBasedPrioritizedExperienceReplay(agent, env, settings["max_replay_buffer_size"], episodic=True)

In [13]:
noise = OUNoise(action_dim, seed=settings["noise_seed"])

In [14]:
exp = Experiment(settings["experiment_path"], session, env, settings, settings["render_environment"], settings["render_frequency"], settings["render_start"])

progress_bar = tqdm(total=settings["timesteps"])

e_t = -1
for t in xrange(settings["timesteps"]):
    e_t += 1
    if t < settings["learning_start"]:
        action = noise.noise()
    else:
        action = agent.action(state)
    
    observation, reward, done, info = env.step(action)
    next_state = np.reshape(preprocess_state(observation), (state_dim,))
    
    experience_replay.add_experience(state, action, preprocess_reward(reward), next_state, done)
    td_error = math.fabs(experience_replay.get_last_td_error())
    model_error = experience_replay.get_last_model_error()
    reward_error = experience_replay.get_last_reward_error()

    exp.record(t, state, action, reward, next_state, done, td_error, model_error, reward_error)
    
    state = next_state
    
    if done:
        exp.print_last_episode_info()
        observation = env.reset()
        state = preprocess_state(observation)
        e_t = -1
        
    if t >= settings["learning_start"]:
        experience_replay.train_agent(settings["batch_size"], settings["train_updates_per_step"])
        experience_replay.update_oldest_priorities(settings["priority_updates_per_step"])
            
    progress_bar.set_description('[{}] reward: {:.2f}, reward 100-step MA: {:.2f}, action: {}, td-error: {:.4f}, model error: {:.4f}, reward error: {:.4f}' \
        .format(t, reward, exp.reward_100ma.get_average(), str(action), td_error, model_error, reward_error))
    progress_bar.update()
    
        
progress_bar.close()


[174] reward: -0.00, reward 100-step MA: 0.98, action: [-0.07092898], td-error: 0.0000, model error: 0.0031, reward error: 2.0657:   2%|▏         | 175/8000 [00:01<06:54, 18.87it/s]    | 1/8000 [00:00<56:34,  2.36it/s]
Total episode reward: 96.4231978292. Finished in 130 steps.
[306] reward: -0.10, reward 100-step MA: -0.05, action: [ 0.98711097], td-error: 25.2040, model error: 0.0001, reward error: 618.8727:   4%|▍         | 307/8000 [00:10<15:50,  8.10it/s]] reward: -0.01, reward 100-step MA: -0.01, action: [-0.3637961], td-error: 0.0000, model error: 0.0507, reward error: 2.5737:   3%|▎         | 234/8000 [00:01<03:35, 36.09it/s]
Total episode reward: 93.5231650728. Finished in 179 steps.
[1695] reward: 99.91, reward 100-step MA: 0.92, action: [ 0.95290232], td-error: 134.8447, model error: 0.0000, reward error: 16067.3672:  21%|██        | 1696/8000 [04:35<20:29,  5.13it/s]] reward: -0.01, reward 100-step MA: 0.95, action: [ 0.23008931], td-error: 0.0000, model error: 0.0000, reward error: 6.6592:   4%|▍         | 321/8000 [00:12<18:29,  6.92it/s]
Total episode reward: -13.8869715649. Finished in 1387 steps.
[1819] reward: 99.99, reward 100-step MA: 0.85, action: [ 0.3119626], td-error: 782.9854, model error: 0.0024, reward error: 12847.1084:  23%|██▎       | 1820/8000 [04:59<19:57,  5.16it/s] [1697] reward: -0.02, reward 100-step MA: 0.92, action: [-0.41467035], td-error: 0.0000, model error: 0.0018, reward error: 154.1618:  21%|██        | 1698/8000 [04:35<20:14,  5.19it/s]
Total episode reward: 84.1286960661. Finished in 124 steps.
[2150] reward: 99.60, reward 100-step MA: 0.89, action: [ 1.98852909], td-error: 543.6807, model error: 0.0001, reward error: 1584.3918:  27%|██▋       | 2151/8000 [06:04<19:17,  5.05it/s][1821] reward: -0.05, reward 100-step MA: 0.85, action: [-0.71537304], td-error: 0.0000, model error: 0.0034, reward error: 1623.8997:  23%|██▎       | 1822/8000 [04:59<19:50,  5.19it/s]
Total episode reward: 63.6965154201. Finished in 331 steps.
[2314] reward: 99.90, reward 100-step MA: 0.94, action: [ 1.01177847], td-error: 0.0000, model error: 0.0017, reward error: 864.4659:  29%|██▉       | 2315/8000 [06:36<18:35,  5.10it/s]   [2152] reward: -0.01, reward 100-step MA: 0.89, action: [-0.34694254], td-error: 30.0261, model error: 0.0011, reward error: 3.0154:  27%|██▋       | 2153/8000 [06:04<19:12,  5.07it/s]
Total episode reward: 92.4477668174. Finished in 164 steps.
[2435] reward: 99.87, reward 100-step MA: 0.93, action: [ 1.15491557], td-error: 187.1152, model error: 0.0005, reward error: 59.9050:  30%|███       | 2436/8000 [07:00<18:21,  5.05it/s] 316] reward: -0.08, reward 100-step MA: 0.94, action: [ 0.89678347], td-error: 0.0000, model error: 0.0005, reward error: 23.1924:  29%|██▉       | 2317/8000 [06:37<19:20,  4.90it/s]
Total episode reward: 91.3727454161. Finished in 121 steps.
[2518] reward: 99.83, reward 100-step MA: 1.93, action: [ 1.29782808], td-error: 604.0186, model error: 0.0001, reward error: 4392.6660:  31%|███▏      | 2519/8000 [07:17<18:21,  4.97it/s]437] reward: -0.04, reward 100-step MA: 0.94, action: [-0.61635572], td-error: 0.0000, model error: 0.0013, reward error: 0.5953:  30%|███       | 2438/8000 [07:01<18:14,  5.08it/s] 
Total episode reward: 95.0047661277. Finished in 83 steps.
[2659] reward: 99.91, reward 100-step MA: 0.94, action: [ 0.94806051], td-error: 214.4082, model error: 0.0003, reward error: 622.4498:  33%|███▎      | 2660/8000 [07:45<17:38,  5.05it/s] [2520] reward: -0.08, reward 100-step MA: 1.93, action: [-0.90444177], td-error: 40.6895, model error: 0.0005, reward error: 0.0553:  32%|███▏      | 2521/8000 [07:17<18:14,  5.00it/s] 
Total episode reward: 91.8093924948. Finished in 141 steps.
[2735] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.99506879], td-error: 290.3008, model error: 0.0002, reward error: 399.9548:  34%|███▍      | 2736/8000 [08:00<17:22,  5.05it/s][2661] reward: -0.03, reward 100-step MA: 0.94, action: [-0.53973788], td-error: 0.0000, model error: 0.0021, reward error: 0.0532:  33%|███▎      | 2662/8000 [07:45<17:45,  5.01it/s]
Total episode reward: 94.5837537746. Finished in 76 steps.
[2879] reward: 99.91, reward 100-step MA: 0.95, action: [ 0.9718256], td-error: 942.3574, model error: 0.0000, reward error: 243.0125:  36%|███▌      | 2880/8000 [08:28<17:01,  5.01it/s]  2737] reward: -0.05, reward 100-step MA: 1.92, action: [-0.69772553], td-error: 12.2798, model error: 0.0004, reward error: 0.5264:  34%|███▍      | 2738/8000 [08:00<17:08,  5.12it/s]
Total episode reward: 93.6874602827. Finished in 144 steps.
[2954] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.9912833], td-error: 801.2500, model error: 0.0001, reward error: 1121.4849:  37%|███▋      | 2955/8000 [08:43<16:56,  4.97it/s]2881] reward: -0.07, reward 100-step MA: 0.95, action: [-0.81734598], td-error: 3.5518, model error: 0.0003, reward error: 0.1470:  36%|███▌      | 2882/8000 [08:29<16:50,  5.06it/s]
Total episode reward: 95.2824589813. Finished in 75 steps.
[3099] reward: 99.90, reward 100-step MA: 0.95, action: [ 0.98041534], td-error: 1076.8770, model error: 0.0007, reward error: 272.2931:  39%|███▉      | 3100/8000 [09:12<16:34,  4.93it/s]2956] reward: -0.09, reward 100-step MA: 1.93, action: [-0.94886076], td-error: 46.8257, model error: 0.0001, reward error: 15.4871:  37%|███▋      | 2957/8000 [08:44<17:00,  4.94it/s]
Total episode reward: 93.7055666543. Finished in 145 steps.
[3180] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.98563802], td-error: 284.2695, model error: 0.0007, reward error: 34.6130:  40%|███▉      | 3181/8000 [09:28<16:05,  4.99it/s]  [3101] reward: -0.14, reward 100-step MA: 0.95, action: [-1.16526079], td-error: 0.0000, model error: 0.0018, reward error: 0.0202:  39%|███▉      | 3102/8000 [09:13<16:19,  5.00it/s]  
Total episode reward: 94.8222190916. Finished in 81 steps.
[3255] reward: 99.90, reward 100-step MA: 1.93, action: [ 1.00092041], td-error: 1437.4727, model error: 0.0002, reward error: 5479.7212:  41%|████      | 3256/8000 [09:43<15:44,  5.02it/s]82] reward: -0.10, reward 100-step MA: 1.93, action: [-0.9911927], td-error: 12.9302, model error: 0.0002, reward error: 0.0260:  40%|███▉      | 3183/8000 [09:29<16:08,  4.97it/s]  
Total episode reward: 95.3206187029. Finished in 75 steps.
[3329] reward: -0.10, reward 100-step MA: 0.92, action: [ 0.99487031], td-error: 655.1602, model error: 0.0001, reward error: 40.0274:  42%|████▏     | 3330/8000 [09:58<15:33,  5.00it/s]   [3257] reward: -0.11, reward 100-step MA: 1.93, action: [-1.06746531], td-error: 19.7520, model error: 0.0000, reward error: 56.3410:  41%|████      | 3258/8000 [09:44<15:52,  4.98it/s] 
Total episode reward: 95.030240833. Finished in 75 steps.
[3409] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.98966575], td-error: 503.1699, model error: 0.0003, reward error: 188.7018:  43%|████▎     | 3410/8000 [10:14<15:28,  4.95it/s]][3331] reward: -0.09, reward 100-step MA: 1.92, action: [-0.94382316], td-error: 0.0000, model error: 0.0006, reward error: 0.9238:  42%|████▏     | 3332/8000 [09:59<15:43,  4.95it/s]     
Total episode reward: 94.5742948659. Finished in 79 steps.
[3483] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.97474086], td-error: 918.9492, model error: 0.0002, reward error: 6.7982:  44%|████▎     | 3484/8000 [10:29<15:09,  4.97it/s]  [3411] reward: -0.11, reward 100-step MA: 1.92, action: [-1.03424931], td-error: 139.5518, model error: 0.0000, reward error: 0.4133:  43%|████▎     | 3412/8000 [10:15<15:19,  4.99it/s]
Total episode reward: 95.0923479071. Finished in 74 steps.
[3631] reward: 99.90, reward 100-step MA: 0.95, action: [ 1.00145221], td-error: 902.7031, model error: 0.0004, reward error: 1497.1527:  45%|████▌     | 3632/8000 [10:59<14:49,  4.91it/s]85] reward: -0.10, reward 100-step MA: 1.92, action: [-0.98203981], td-error: 0.0000, model error: 0.0001, reward error: 0.2068:  44%|████▎     | 3486/8000 [10:30<15:10,  4.96it/s]
Total episode reward: 92.6124776109. Finished in 148 steps.
[3708] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.97811317], td-error: 1086.6914, model error: 0.0006, reward error: 506.8153:  46%|████▋     | 3709/8000 [11:14<14:43,  4.86it/s][3633] reward: -0.11, reward 100-step MA: 0.95, action: [-1.02785206], td-error: 307.9521, model error: 0.0000, reward error: 27.7952:  45%|████▌     | 3634/8000 [10:59<14:56,  4.87it/s]
Total episode reward: 95.1492344202. Finished in 77 steps.
[3787] reward: 99.90, reward 100-step MA: 1.93, action: [ 0.99585497], td-error: 2309.5820, model error: 0.0005, reward error: 65.9612:  47%|████▋     | 3788/8000 [11:30<14:24,  4.87it/s] [3710] reward: -0.11, reward 100-step MA: 1.93, action: [-1.04609823], td-error: 256.3818, model error: 0.0000, reward error: 15.2844:  46%|████▋     | 3711/8000 [11:15<14:37,  4.89it/s]
Total episode reward: 94.8678545986. Finished in 79 steps.
[3949] reward: 99.90, reward 100-step MA: 0.94, action: [ 1.00653851], td-error: 1091.6953, model error: 0.0009, reward error: 582.3171:  49%|████▉     | 3950/8000 [12:03<13:59,  4.83it/s]  89] reward: -0.10, reward 100-step MA: 1.93, action: [-0.98602957], td-error: 307.8262, model error: 0.0001, reward error: 0.4177:  47%|████▋     | 3790/8000 [11:31<14:01,  5.00it/s]
Total episode reward: 90.3256358579. Finished in 162 steps.
[4029] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.98892784], td-error: 2208.8516, model error: 0.0003, reward error: 1473.5466:  50%|█████     | 4030/8000 [12:19<13:27,  4.91it/s]3951] reward: -0.11, reward 100-step MA: 0.94, action: [-1.06735146], td-error: 108.5859, model error: 0.0001, reward error: 6.5596:  49%|████▉     | 3952/8000 [12:03<13:45,  4.91it/s]
Total episode reward: 94.2026321658. Finished in 80 steps.
[4103] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.99534035], td-error: 1083.8281, model error: 0.0007, reward error: 339.0142:  51%|█████▏    | 4104/8000 [12:34<13:13,  4.91it/s] [4031] reward: -0.11, reward 100-step MA: 1.92, action: [-1.03191257], td-error: 35.2227, model error: 0.0001, reward error: 0.4022:  50%|█████     | 4032/8000 [12:20<13:18,  4.97it/s]
Total episode reward: 94.4505720697. Finished in 74 steps.
[4176] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.9959271], td-error: 4162.9219, model error: 0.0002, reward error: 1242.0801:  52%|█████▏    | 4177/8000 [12:49<12:38,  5.04it/s][4105] reward: -0.11, reward 100-step MA: 1.92, action: [-1.06371951], td-error: 100.0859, model error: 0.0001, reward error: 4.1703:  51%|█████▏    | 4106/8000 [12:35<13:03,  4.97it/s]
Total episode reward: 94.6139781381. Finished in 73 steps.
[4248] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.99742329], td-error: 3727.8125, model error: 0.0001, reward error: 209.8219:  53%|█████▎    | 4249/8000 [13:04<12:31,  4.99it/s][4178] reward: -0.11, reward 100-step MA: 1.92, action: [-1.04347324], td-error: 292.0566, model error: 0.0002, reward error: 0.3191:  52%|█████▏    | 4179/8000 [12:49<12:43,  5.00it/s]
Total episode reward: 94.5663274728. Finished in 72 steps.
[4319] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00802863], td-error: 2581.1562, model error: 0.0003, reward error: 226.0249:  54%|█████▍    | 4320/8000 [13:17<10:47,  5.69it/s][4250] reward: -0.12, reward 100-step MA: 1.92, action: [-1.10661185], td-error: 234.4160, model error: 0.0000, reward error: 0.5828:  53%|█████▎    | 4251/8000 [13:04<12:36,  4.95it/s]
Total episode reward: 94.3286504071. Finished in 71 steps.
[4394] reward: 99.90, reward 100-step MA: 1.92, action: [ 0.98692739], td-error: 3062.4141, model error: 0.0001, reward error: 36.8419:  55%|█████▍    | 4395/8000 [13:30<10:38,  5.65it/s] [4321] reward: -0.07, reward 100-step MA: 1.92, action: [-0.83494484], td-error: 0.0000, model error: 0.0001, reward error: 0.5059:  54%|█████▍    | 4322/8000 [13:17<10:42,  5.73it/s]
Total episode reward: 94.0069662674. Finished in 75 steps.
[4465] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.97635305], td-error: 3472.7969, model error: 0.0001, reward error: 0.1824:  56%|█████▌    | 4466/8000 [13:43<10:42,  5.50it/s] [4396] reward: -0.12, reward 100-step MA: 1.92, action: [-1.10470295], td-error: 358.2227, model error: 0.0000, reward error: 0.9168:  55%|█████▍    | 4397/8000 [13:31<10:29,  5.72it/s]
Total episode reward: 94.3249927012. Finished in 71 steps.
[4537] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.02158332], td-error: 1359.8125, model error: 0.0003, reward error: 143.6627:  57%|█████▋    | 4538/8000 [13:56<10:13,  5.64it/s]467] reward: -0.09, reward 100-step MA: 1.91, action: [-0.94578326], td-error: 0.0000, model error: 0.0002, reward error: 2.2365:  56%|█████▌    | 4468/8000 [13:43<10:40,  5.51it/s]  
Total episode reward: 94.1926985075. Finished in 72 steps.
[4609] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98690319], td-error: 4752.4531, model error: 0.0000, reward error: 0.9344:  58%|█████▊    | 4610/8000 [14:08<09:56,  5.68it/s]  [4539] reward: -0.08, reward 100-step MA: 1.91, action: [-0.90795952], td-error: 47.3906, model error: 0.0002, reward error: 0.0458:  57%|█████▋    | 4540/8000 [13:56<10:08,  5.69it/s]  
Total episode reward: 94.0130174927. Finished in 72 steps.
[4680] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98882532], td-error: 3905.5938, model error: 0.0002, reward error: 14.7974:  59%|█████▊    | 4681/8000 [14:21<09:46,  5.66it/s]4611] reward: -0.10, reward 100-step MA: 1.91, action: [-1.00080895], td-error: 0.0000, model error: 0.0003, reward error: 0.4150:  58%|█████▊    | 4612/8000 [14:09<09:55,  5.69it/s]  
Total episode reward: 93.7941235496. Finished in 71 steps.
[4750] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00387061], td-error: 5898.9062, model error: 0.0002, reward error: 19.8732:  59%|█████▉    | 4751/8000 [14:33<09:33,  5.67it/s][4682] reward: -0.11, reward 100-step MA: 1.91, action: [-1.02825725], td-error: 544.4648, model error: 0.0000, reward error: 0.0009:  59%|█████▊    | 4683/8000 [14:21<09:42,  5.70it/s]
Total episode reward: 93.7409767543. Finished in 70 steps.
[4821] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98664117], td-error: 7844.2812, model error: 0.0001, reward error: 116.7266:  60%|██████    | 4822/8000 [14:46<09:18,  5.69it/s]4752] reward: -0.12, reward 100-step MA: 1.91, action: [-1.08858573], td-error: 1490.3945, model error: 0.0000, reward error: 7.6123:  59%|█████▉    | 4753/8000 [14:34<09:38,  5.61it/s]
Total episode reward: 94.101249705. Finished in 71 steps.
[4897] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99818778], td-error: 8098.2188, model error: 0.0002, reward error: 0.3148:  61%|██████    | 4898/8000 [14:59<09:13,  5.60it/s]  [4823] reward: -0.10, reward 100-step MA: 1.91, action: [-1.02244425], td-error: 941.8750, model error: 0.0000, reward error: 1.5026:  60%|██████    | 4824/8000 [14:46<09:17,  5.70it/s]
Total episode reward: 93.3849713019. Finished in 76 steps.
[4973] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99267745], td-error: 0.0000, model error: 0.0007, reward error: 209.5989:  62%|██████▏   | 4974/8000 [15:13<08:55,  5.65it/s]   899] reward: -0.10, reward 100-step MA: 1.91, action: [-1.01576245], td-error: 132.7969, model error: 0.0001, reward error: 2.7258:  61%|██████▏   | 4900/8000 [15:00<09:11,  5.63it/s]
Total episode reward: 93.1590904891. Finished in 76 steps.
[5045] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00971055], td-error: 725.8438, model error: 0.0005, reward error: 1.1238:  63%|██████▎   | 5046/8000 [15:26<08:55,  5.52it/s]   975] reward: -0.10, reward 100-step MA: 1.91, action: [-1.01912498], td-error: 1398.4453, model error: 0.0001, reward error: 0.0080:  62%|██████▏   | 4976/8000 [15:13<09:04,  5.56it/s]
Total episode reward: 93.8844160348. Finished in 72 steps.
[5118] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.9998523], td-error: 10294.0312, model error: 0.0001, reward error: 32.2812:  64%|██████▍   | 5119/8000 [15:39<08:37,  5.57it/s] 47] reward: -0.10, reward 100-step MA: 1.91, action: [-0.98514611], td-error: 0.0000, model error: 0.0002, reward error: 0.0349:  63%|██████▎   | 5048/8000 [15:26<08:55,  5.51it/s]  
Total episode reward: 93.4291041701. Finished in 73 steps.
[5189] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99092078], td-error: 5854.1875, model error: 0.0003, reward error: 111.8748:  65%|██████▍   | 5190/8000 [15:51<08:20,  5.62it/s]5120] reward: -0.10, reward 100-step MA: 1.91, action: [-0.98187017], td-error: 981.4922, model error: 0.0000, reward error: 0.2890:  64%|██████▍   | 5121/8000 [15:39<08:30,  5.64it/s]
Total episode reward: 93.6595448984. Finished in 71 steps.
[5260] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00127995], td-error: 1309.2500, model error: 0.0006, reward error: 6.6576:  66%|██████▌   | 5261/8000 [16:04<08:06,  5.63it/s]  [5191] reward: -0.08, reward 100-step MA: 1.91, action: [-0.87972558], td-error: 533.0391, model error: 0.0001, reward error: 13.9041:  65%|██████▍   | 5192/8000 [15:52<08:15,  5.67it/s]
Total episode reward: 93.7073365383. Finished in 71 steps.
[5330] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.01399302], td-error: 5317.9688, model error: 0.0002, reward error: 0.5838:  67%|██████▋   | 5331/8000 [16:16<07:59,  5.57it/s]  262] reward: -0.06, reward 100-step MA: 1.91, action: [-0.78802913], td-error: 0.0000, model error: 0.0004, reward error: 0.4686:  66%|██████▌   | 5263/8000 [16:04<08:07,  5.62it/s] 
Total episode reward: 93.7653968748. Finished in 70 steps.
[5401] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.97575021], td-error: 24652.0000, model error: 0.0002, reward error: 101.4299:  68%|██████▊   | 5402/8000 [16:29<07:48,  5.54it/s]32] reward: -0.11, reward 100-step MA: 1.91, action: [-1.04186809], td-error: 2283.7578, model error: 0.0000, reward error: 0.0243:  67%|██████▋   | 5333/8000 [16:17<07:55,  5.61it/s]
Total episode reward: 93.6519493879. Finished in 71 steps.
[5497] reward: 99.99, reward 100-step MA: 1.94, action: [ 0.33883393], td-error: 0.0000, model error: 0.0002, reward error: 3022.5708:  69%|██████▊   | 5498/8000 [16:46<07:26,  5.60it/s]   [5403] reward: -0.02, reward 100-step MA: 1.91, action: [-0.42040992], td-error: 0.0000, model error: 0.0000, reward error: 0.3995:  68%|██████▊   | 5404/8000 [16:29<07:46,  5.57it/s]
Total episode reward: 94.2030324598. Finished in 96 steps.
[5569] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99711835], td-error: 19448.1875, model error: 0.0001, reward error: 64.4392:  70%|██████▉   | 5570/8000 [16:59<07:16,  5.57it/s]499] reward: -0.05, reward 100-step MA: 1.94, action: [-0.70899582], td-error: 0.0000, model error: 0.0001, reward error: 4.5246:  69%|██████▉   | 5500/8000 [16:46<07:24,  5.62it/s]
Total episode reward: 93.5501463559. Finished in 72 steps.
[5642] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.021644], td-error: 17567.5625, model error: 0.0001, reward error: 49.9054:  71%|███████   | 5643/8000 [17:12<07:02,  5.58it/s]  [5571] reward: -0.11, reward 100-step MA: 1.91, action: [-1.02588367], td-error: 3794.4844, model error: 0.0000, reward error: 1.4887:  70%|██████▉   | 5572/8000 [16:59<07:12,  5.62it/s]
Total episode reward: 93.389277264. Finished in 73 steps.
[5715] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.99834955], td-error: 16039.4375, model error: 0.0000, reward error: 16.5300:  71%|███████▏  | 5716/8000 [17:25<06:47,  5.61it/s]644] reward: -0.04, reward 100-step MA: 1.91, action: [-0.62854755], td-error: 0.0000, model error: 0.0000, reward error: 0.0202:  71%|███████   | 5645/8000 [17:12<07:00,  5.59it/s]   
Total episode reward: 93.380181634. Finished in 73 steps.
[5787] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00111783], td-error: 39353.5000, model error: 0.0004, reward error: 79.6178:  72%|███████▏  | 5788/8000 [17:38<06:38,  5.55it/s][5717] reward: -0.11, reward 100-step MA: 1.91, action: [-1.05010939], td-error: 0.0000, model error: 0.0009, reward error: 0.0698:  71%|███████▏  | 5718/8000 [17:25<06:47,  5.59it/s]   
Total episode reward: 93.339612244. Finished in 72 steps.
[5859] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00086856], td-error: 17437.3125, model error: 0.0001, reward error: 0.3788:  73%|███████▎  | 5860/8000 [17:51<06:27,  5.52it/s] [5789] reward: -0.09, reward 100-step MA: 1.91, action: [-0.93721908], td-error: 1578.4375, model error: 0.0000, reward error: 2.3280:  72%|███████▏  | 5790/8000 [17:38<06:33,  5.62it/s]
Total episode reward: 93.4486696729. Finished in 72 steps.
[5931] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00103521], td-error: 20909.2500, model error: 0.0002, reward error: 0.1838:  74%|███████▍  | 5932/8000 [18:04<06:10,  5.59it/s] 5861] reward: -0.00, reward 100-step MA: 1.91, action: [-0.22270525], td-error: 0.0000, model error: 0.0000, reward error: 0.1423:  73%|███████▎  | 5862/8000 [17:51<06:28,  5.51it/s]
Total episode reward: 93.4269928606. Finished in 72 steps.
[6002] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00126088], td-error: 23932.3750, model error: 0.0001, reward error: 0.0883:  75%|███████▌  | 6003/8000 [18:16<06:08,  5.42it/s] 5933] reward: -0.11, reward 100-step MA: 1.91, action: [-1.04868412], td-error: 4970.4219, model error: 0.0001, reward error: 0.1952:  74%|███████▍  | 5934/8000 [18:04<06:12,  5.55it/s]
Total episode reward: 93.3333530103. Finished in 71 steps.
[6114] reward: 99.90, reward 100-step MA: 0.92, action: [ 1.01420605], td-error: 23040.3750, model error: 0.0004, reward error: 2.1225:  76%|███████▋  | 6115/8000 [18:36<05:38,  5.58it/s]  004] reward: -0.01, reward 100-step MA: 1.91, action: [-0.27053785], td-error: 0.0000, model error: 0.0001, reward error: 5.3422:  75%|███████▌  | 6005/8000 [18:17<06:10,  5.39it/s]
Total episode reward: 91.6741434448. Finished in 112 steps.
[6188] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.98921239], td-error: 25949.4375, model error: 0.0003, reward error: 75.4884:  77%|███████▋  | 6189/8000 [18:50<05:29,  5.50it/s]6116] reward: -0.03, reward 100-step MA: 0.92, action: [-0.51721132], td-error: 0.0000, model error: 0.0000, reward error: 7.7200:  76%|███████▋  | 6117/8000 [18:37<05:41,  5.51it/s]
Total episode reward: 93.5686943771. Finished in 74 steps.
[6260] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.01850045], td-error: 38120.8750, model error: 0.0000, reward error: 78.2413:  78%|███████▊  | 6261/8000 [19:03<05:10,  5.60it/s][6190] reward: -0.11, reward 100-step MA: 1.91, action: [-1.03821003], td-error: 4561.7812, model error: 0.0000, reward error: 0.6163:  77%|███████▋  | 6191/8000 [18:50<05:28,  5.50it/s]
Total episode reward: 93.1345810918. Finished in 72 steps.
[6331] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.9989115], td-error: 39253.7500, model error: 0.0001, reward error: 3.2541:  79%|███████▉  | 6332/8000 [19:16<05:00,  5.55it/s]  [6262] reward: -0.05, reward 100-step MA: 1.90, action: [-0.70389664], td-error: 0.0000, model error: 0.0001, reward error: 0.2201:  78%|███████▊  | 6263/8000 [19:03<05:10,  5.60it/s]
Total episode reward: 93.4222280927. Finished in 71 steps.
[6403] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.97811031], td-error: 22837.6250, model error: 0.0001, reward error: 1650.8687:  80%|████████  | 6404/8000 [19:29<04:50,  5.50it/s]3] reward: -0.09, reward 100-step MA: 1.91, action: [-0.93731892], td-error: 2974.4062, model error: 0.0002, reward error: 2.7135:  79%|███████▉  | 6334/8000 [19:16<04:58,  5.58it/s]
Total episode reward: 93.3922931207. Finished in 72 steps.
[6474] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00635314], td-error: 18923.6250, model error: 0.0005, reward error: 12.2901:  81%|████████  | 6475/8000 [19:41<04:34,  5.55it/s]  [6405] reward: -0.09, reward 100-step MA: 1.91, action: [-0.94413006], td-error: 178.2500, model error: 0.0000, reward error: 2.6618:  80%|████████  | 6406/8000 [19:29<04:49,  5.51it/s]
Total episode reward: 93.4017595511. Finished in 71 steps.
[6545] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00275207], td-error: 32210.8750, model error: 0.0002, reward error: 67.2621:  82%|████████▏ | 6546/8000 [19:54<04:23,  5.51it/s][6476] reward: -0.10, reward 100-step MA: 1.91, action: [-1.01411188], td-error: 4951.7812, model error: 0.0000, reward error: 0.2380:  81%|████████  | 6477/8000 [19:42<04:32,  5.58it/s]
Total episode reward: 93.2676075274. Finished in 71 steps.
[6617] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00184715], td-error: 0.0000, model error: 0.0021, reward error: 9.8404:  83%|████████▎ | 6618/8000 [20:07<04:09,  5.54it/s]     [6547] reward: -0.10, reward 100-step MA: 1.90, action: [-1.01907206], td-error: 2401.1875, model error: 0.0004, reward error: 0.0280:  82%|████████▏ | 6548/8000 [19:55<04:23,  5.51it/s] 
Total episode reward: 93.2235003289. Finished in 72 steps.
[6689] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00427401], td-error: 30074.0000, model error: 0.0003, reward error: 2516.7756:  84%|████████▎ | 6690/8000 [20:20<03:58,  5.49it/s]reward: -0.10, reward 100-step MA: 1.90, action: [-1.02055562], td-error: 12882.8125, model error: 0.0002, reward error: 0.7609:  83%|████████▎ | 6620/8000 [20:08<04:05,  5.61it/s]
Total episode reward: 93.3046003025. Finished in 72 steps.
[6764] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00379801], td-error: 299299.5000, model error: 0.0065, reward error: 53.2323:  85%|████████▍ | 6765/8000 [20:34<03:46,  5.45it/s] [6691] reward: -0.01, reward 100-step MA: 1.91, action: [-0.32090044], td-error: 0.0000, model error: 0.0000, reward error: 0.2007:  84%|████████▎ | 6692/8000 [20:21<03:55,  5.56it/s]   
Total episode reward: 93.7202089961. Finished in 75 steps.
[6835] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.01534021], td-error: 0.0000, model error: 0.0011, reward error: 0.6105:  85%|████████▌ | 6836/8000 [20:47<03:35,  5.39it/s]      [6766] reward: -0.07, reward 100-step MA: 1.91, action: [-0.86211205], td-error: 0.0000, model error: 0.0005, reward error: 0.0259:  85%|████████▍ | 6767/8000 [20:34<03:45,  5.46it/s]
Total episode reward: 93.3041615159. Finished in 71 steps.
[6940] reward: 99.90, reward 100-step MA: 0.91, action: [ 1.00993085], td-error: 19347.2500, model error: 0.0003, reward error: 155.5727:  87%|████████▋ | 6941/8000 [21:06<03:14,  5.45it/s] reward: -0.01, reward 100-step MA: 1.91, action: [ 0.25121629], td-error: 0.0000, model error: 0.0000, reward error: 0.0066:  85%|████████▌ | 6838/8000 [20:47<03:31,  5.49it/s]
Total episode reward: 91.0947722998. Finished in 105 steps.
[7045] reward: 99.90, reward 100-step MA: 0.90, action: [ 0.99838042], td-error: 0.0000, model error: 0.0011, reward error: 42.3520:  88%|████████▊ | 7046/8000 [21:26<02:55,  5.45it/s]      6942] reward: -0.02, reward 100-step MA: 0.91, action: [ 0.4990654], td-error: 0.0000, model error: 0.0001, reward error: 1.5038:  87%|████████▋ | 6943/8000 [21:06<03:12,  5.50it/s]    
Total episode reward: 90.2636806126. Finished in 105 steps.
[7154] reward: 99.89, reward 100-step MA: 0.91, action: [ 1.03434765], td-error: 28368.2500, model error: 0.0016, reward error: 3.0978:  89%|████████▉ | 7155/8000 [21:46<02:35,  5.42it/s]   7] reward: -0.01, reward 100-step MA: 0.91, action: [ 0.35400629], td-error: 0.0000, model error: 0.0001, reward error: 1.9994:  88%|████████▊ | 7048/8000 [21:26<02:52,  5.53it/s]   
Total episode reward: 90.5581873064. Finished in 109 steps.
[7231] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00807548], td-error: 28910.0000, model error: 0.0004, reward error: 28.0701:  90%|█████████ | 7232/8000 [22:00<02:21,  5.41it/s]7156] reward: -0.00, reward 100-step MA: 0.91, action: [ 0.16145456], td-error: 0.0000, model error: 0.0000, reward error: 3.0972:  89%|████████▉ | 7157/8000 [21:46<02:32,  5.52it/s]
Total episode reward: 92.2672517492. Finished in 77 steps.
[7339] reward: 99.90, reward 100-step MA: 0.92, action: [ 0.97473192], td-error: 83867.0000, model error: 0.0004, reward error: 59.1217:  92%|█████████▏| 7340/8000 [22:20<02:01,  5.41it/s] 7233] reward: -0.06, reward 100-step MA: 1.90, action: [ 0.77996969], td-error: 11150.1250, model error: 0.0001, reward error: 0.0113:  90%|█████████ | 7234/8000 [22:00<02:21,  5.42it/s]
Total episode reward: 90.9824652727. Finished in 108 steps.
[7415] reward: 99.90, reward 100-step MA: 1.91, action: [ 1.00547302], td-error: 45716.2500, model error: 0.0003, reward error: 4.2762:  93%|█████████▎| 7416/8000 [22:34<01:47,  5.42it/s] [7341] reward: -0.01, reward 100-step MA: 0.92, action: [ 0.24372602], td-error: 0.0000, model error: 0.0000, reward error: 0.7508:  92%|█████████▏| 7342/8000 [22:20<02:00,  5.44it/s]
Total episode reward: 93.4434528524. Finished in 76 steps.
[7522] reward: 99.90, reward 100-step MA: 0.92, action: [ 1.00021493], td-error: 65563.0000, model error: 0.0004, reward error: 77.1325:  94%|█████████▍| 7523/8000 [22:53<01:27,  5.46it/s] 417] reward: -0.03, reward 100-step MA: 1.91, action: [ 0.54996085], td-error: 0.0000, model error: 0.0001, reward error: 0.1301:  93%|█████████▎| 7418/8000 [22:34<01:48,  5.38it/s]
Total episode reward: 91.3302062892. Finished in 107 steps.
[7595] reward: 99.90, reward 100-step MA: 1.90, action: [ 1.00071621], td-error: 53430.5000, model error: 0.0005, reward error: 7.6382:  95%|█████████▍| 7596/8000 [23:07<01:14,  5.40it/s] [7524] reward: -0.03, reward 100-step MA: 0.92, action: [-0.51825792], td-error: 0.0000, model error: 0.0007, reward error: 0.0562:  94%|█████████▍| 7525/8000 [22:54<01:26,  5.52it/s]
Total episode reward: 92.8676053751. Finished in 73 steps.
[7668] reward: 99.90, reward 100-step MA: 1.90, action: [ 0.97726476], td-error: 14471.0000, model error: 0.0007, reward error: 0.0863:  96%|█████████▌| 7669/8000 [23:20<01:01,  5.42it/s] 7597] reward: -0.01, reward 100-step MA: 1.90, action: [-0.3307277], td-error: 0.0000, model error: 0.0001, reward error: 0.2370:  95%|█████████▍| 7598/8000 [23:07<01:13,  5.45it/s] 
Total episode reward: 92.749057066. Finished in 73 steps.
[7776] reward: 99.90, reward 100-step MA: 0.91, action: [ 1.01603138], td-error: 53074.0000, model error: 0.0010, reward error: 0.2535:  97%|█████████▋| 7777/8000 [23:40<00:41,  5.38it/s]   70] reward: -0.02, reward 100-step MA: 1.90, action: [ 0.40066624], td-error: 0.0000, model error: 0.0002, reward error: 0.6419:  96%|█████████▌| 7671/8000 [23:20<01:00,  5.40it/s]
Total episode reward: 90.8966227119. Finished in 108 steps.
[7849] reward: 99.90, reward 100-step MA: 1.91, action: [ 0.9964174], td-error: 66853.5000, model error: 0.0006, reward error: 41.5829:  98%|█████████▊| 7850/8000 [23:53<00:24,  6.07it/s] 7778] reward: -0.03, reward 100-step MA: 0.91, action: [-0.56303269], td-error: 6170.7500, model error: 0.0000, reward error: 0.2120:  97%|█████████▋| 7779/8000 [23:40<00:41,  5.38it/s]
Total episode reward: 93.4664236065. Finished in 73 steps.
[7960] reward: 99.90, reward 100-step MA: 0.91, action: [ 0.99759507], td-error: 67824.0000, model error: 0.0005, reward error: 27.7434: 100%|█████████▉| 7961/8000 [24:11<00:06,  5.88it/s]  51] reward: -0.01, reward 100-step MA: 1.91, action: [ 0.24447751], td-error: 0.0000, model error: 0.0000, reward error: 1.6037:  98%|█████████▊| 7852/8000 [23:53<00:24,  6.03it/s]
Total episode reward: 90.7968656336. Finished in 111 steps.
[7999] reward: -0.00, reward 100-step MA: 0.91, action: [ 0.14601552], td-error: 0.0000, model error: 0.0001, reward error: 0.0480: 100%|██████████| 8000/8000 [24:18<00:00,  6.17it/s]     [7962] reward: -0.00, reward 100-step MA: 0.92, action: [-0.17932951], td-error: 0.0000, model error: 0.0002, reward error: 2.0172: 100%|█████████▉| 7963/8000 [24:11<00:06,  5.89it/s]

In [15]:
exp.save()
print("Experiment results saved in " + exp.path)


Experiment results saved in experiments/experiment_dmlac_MountainCarContinuous-v0_1495409405

In [16]:
exp.plot_cumulative_reward()


Out[16]:
[<matplotlib.lines.Line2D at 0x7fd6870fcc50>]

In [17]:
exp.plot_reward()


Out[17]:
[<matplotlib.lines.Line2D at 0x7fd6870fcc10>]

In [18]:
exp.plot_td_error()


Out[18]:
[<matplotlib.lines.Line2D at 0x7fd68754ffd0>]

In [19]:
exp.plot_model_error(skip_steps=settings["learning_start"]+10)


Out[19]:
[<matplotlib.lines.Line2D at 0x7fd6cc13abd0>]

In [20]:
exp.plot_reward_error(skip_steps=settings["learning_start"]+10)


Out[20]:
[<matplotlib.lines.Line2D at 0x7fd687319850>]

In [21]:
exp.plot_episode_reward()


Out[21]:
[<matplotlib.lines.Line2D at 0x7fd6871def50>]

In [22]:
exp.plot_episode_duration()


Out[22]:
[<matplotlib.lines.Line2D at 0x7fd682863190>]

In [23]:
if settings["render_environment"]:
    exp.display_frames_as_gif()




Once Loop Reflect

In [24]:
#session.close()

In [25]:
exp.print_all_tf_variables()


(u'Actor_hidden_0_256_W:0', array([[  3.10449123e-01,  -8.30250323e-01,   9.42699611e-01,
          3.77484947e-01,   4.49805200e-01,  -6.36179745e-01,
          1.15343511e-01,  -6.44223213e-01,   3.69558215e-01,
         -4.33310539e-01,   2.73950487e-01,  -4.33384120e-01,
          4.97410417e-01,  -1.01236284e+00,  -1.06961274e+00,
          8.49069238e-01,   2.02725339e+00,  -6.94160819e-01,
          2.32811853e-01,   8.26460540e-01,  -4.79767799e-01,
          1.45014729e-02,  -9.26192403e-01,  -8.38630021e-01,
         -6.60115004e-01,  -3.48641157e-01,   5.71705520e-01,
         -7.83687830e-01,  -3.79593939e-01,   8.58444273e-01,
          9.39537525e-01,   6.41536236e-01,  -6.52774513e-01,
          2.23633885e+00,   1.42488807e-01,  -1.46013528e-01,
         -5.02676189e-01,   8.99448574e-01,   8.86535048e-02,
         -7.95273423e-01,  -9.53532979e-02,  -3.82327825e-01,
          2.59257585e-01,  -1.67254496e+00,   6.52680218e-01,
         -5.26656657e-02,  -4.46071893e-01,  -2.09293410e-01,
         -4.37916130e-01,   3.03620934e-01,   2.13052541e-01,
          4.79669631e-01,  -8.36777210e-01,  -9.73704457e-03,
          1.03103466e-01,  -1.75322998e+00,   1.38333046e+00,
         -2.07181647e-01,  -6.89368069e-01,   1.73870504e-01,
          4.70507473e-01,  -8.17631066e-01,   1.84106958e+00,
         -5.70603490e-01,   2.37111136e-01,   2.55336255e-01,
          3.67859781e-01,  -3.16298068e-01,   3.83851677e-02,
          6.24049783e-01,  -1.42319906e+00,   1.48202166e-01,
          1.49342418e-01,   3.70001853e-01,  -1.40107796e-01,
          7.73420036e-01,  -2.83770263e-01,  -6.50652051e-01,
          7.59938121e-01,   1.53196100e-02,  -5.87954879e-01,
         -1.48905382e-01,   1.48286867e+00,   8.16242814e-01,
          8.87747765e-01,   3.46297354e-01,  -3.37075919e-01,
         -5.45897782e-01,  -5.93766980e-02,  -5.12390137e-01,
          6.47911072e-01,  -2.22436875e-01,  -3.63669842e-01,
         -3.67496431e-01,  -6.38475299e-01,   1.77550599e-01,
         -3.23514581e-01,   4.80933428e-01,   1.07571018e+00,
          4.24357742e-01,  -1.38386548e+00,  -4.30373639e-01,
         -6.20666623e-01,   1.19751059e-01,   9.49941099e-01,
         -6.25970662e-01,  -1.03036189e+00,   1.52562499e+00,
          1.55439600e-01,   7.24767298e-02,  -1.33926535e+00,
          2.98655659e-01,  -3.40606183e-01,  -2.78280616e-01,
         -4.37145960e-03,   5.58452029e-03,   5.62004447e-01,
          1.87630832e+00,  -6.21275365e-01,  -1.05637050e+00,
          7.73365572e-02,   6.57335937e-01,  -5.08509755e-01,
          6.36917770e-01,  -7.48391807e-01,  -2.77751565e-01,
         -7.99452424e-01,   1.00904934e-01,   3.70112360e-01,
          1.07833219e+00,   3.68626118e-02,   5.10026515e-01,
          2.39380226e-01,  -3.70802402e-01,   2.12347761e-01,
         -6.37997016e-02,   5.26171267e-01,   2.18239158e-01,
          2.57296443e-01,  -2.41112486e-01,   7.29543149e-01,
          5.29834867e-01,   4.87247586e-01,  -7.24941850e-01,
          3.16994153e-02,  -2.11869389e-01,   6.27519906e-01,
         -1.00062989e-01,  -4.70543742e-01,  -1.21216893e+00,
          1.43831384e+00,   1.30912983e+00,   5.44346264e-03,
         -3.80148232e-01,   7.83552825e-01,   7.04818010e-01,
         -2.71088719e-01,   2.03143954e+00,   9.94931698e-01,
         -7.37361908e-02,   3.16743106e-01,   2.46313453e-01,
         -1.21103835e+00,  -4.39995974e-01,  -2.19813392e-01,
         -5.03776193e-01,  -1.03917408e+00,   2.66602397e-01,
          1.63534850e-01,  -2.88906425e-01,   4.15222853e-01,
         -5.21876931e-01,  -1.41412950e+00,  -1.00904989e+00,
          1.16229069e+00,  -3.75472575e-01,  -1.00550735e+00,
          5.24826825e-01,   7.95499206e-01,   8.22876841e-02,
         -1.49883759e+00,   5.00100136e-01,   8.02434504e-01,
         -3.61618638e-01,   4.64179307e-01,  -8.93782675e-01,
          4.99556363e-01,   4.02611606e-02,  -6.65707767e-01,
          1.79630792e+00,   6.41576409e-01,   1.83129162e-01,
          7.27805495e-02,   2.18539342e-01,   5.71363926e-01,
         -3.85818988e-01,   1.46925223e+00,   3.15571040e-01,
          4.09021050e-01,   5.53112805e-01,   1.91366240e-01,
         -4.69458789e-01,   5.08951604e-01,   4.69405174e-01,
          1.75137985e-02,  -1.14242423e+00,  -9.83097970e-01,
          1.95617899e-01,  -1.93564463e-02,  -3.60637099e-01,
         -4.15164679e-01,   3.03075373e-01,  -4.37166512e-01,
          4.38012034e-01,  -5.57291627e-01,   2.99865037e-01,
          2.60716826e-01,  -3.56903493e-01,   1.46739995e+00,
         -6.21088445e-01,  -1.20839633e-01,   1.53859556e+00,
         -6.54112399e-02,  -7.01172650e-02,   4.92060363e-01,
          8.78054649e-02,   5.93132913e-01,   1.13010705e-02,
         -7.77364001e-02,  -8.58477354e-01,   7.16403306e-01,
          2.42307007e-01,  -5.58180809e-01,  -5.83780706e-01,
         -6.03426814e-01,   1.38863981e+00,   4.60923314e-01,
         -3.80864948e-01,   7.19509125e-02,   1.67480302e+00,
         -1.98604837e-02,   1.42359093e-01,   5.32993853e-01,
          4.92212534e-01,  -4.46462750e-01,  -7.50971377e-01,
          2.68843174e-01,   1.63841414e+00,   7.35512733e-01,
          4.21109438e-01,  -4.05422330e-01,  -6.72171414e-01,
         -5.43919981e-01,   9.54858780e-01,   9.75027084e-01,
         -1.97575331e-01],
       [ -1.51592624e+00,  -7.49542892e-01,   6.11999154e-01,
         -6.62099957e-01,   4.31255698e-02,   2.01261306e+00,
          4.19409901e-01,   1.04195487e+00,   1.11761324e-01,
         -8.06235135e-01,  -6.83436632e-01,   7.24296272e-01,
         -1.94077921e+00,  -1.28406638e-04,  -2.92900592e-01,
         -1.31146979e+00,  -6.02529012e-02,  -6.41608192e-03,
          1.02498174e+00,   1.46659172e+00,   1.99112415e-01,
          3.84539485e-01,  -7.37488687e-01,   1.51807320e+00,
          1.42199552e+00,  -8.22976872e-05,  -4.01701301e-01,
         -1.98313877e-01,  -1.28167462e+00,   3.05877626e-02,
         -4.01048332e-01,  -1.07658875e+00,  -1.48820651e+00,
          3.08603734e-01,   5.99196434e-01,   3.73436421e-01,
          2.34998941e+00,  -7.82342494e-01,  -8.45778942e-01,
          1.60674846e+00,   4.42735367e-02,  -6.29005849e-01,
          1.16891468e+00,   5.23077369e-01,  -6.09504759e-01,
          1.11510026e+00,   4.07289058e-01,  -1.04694486e+00,
         -2.26057601e+00,   2.49116421e+00,   1.12088704e+00,
         -1.11294188e-01,   1.27260947e+00,  -2.55220056e-01,
         -1.21962345e+00,  -8.16752732e-01,   6.53686702e-01,
         -8.95889997e-01,   1.99894989e+00,   5.58066428e-01,
          1.79874980e+00,   1.56600475e+00,   6.05083227e-01,
          1.71152011e-01,   1.65872052e-01,   1.11960940e-01,
          8.23391140e-01,  -2.43463472e-01,  -2.16933906e-01,
          1.38933659e+00,  -1.81411669e-01,  -3.98462564e-01,
          9.68169212e-01,  -1.89817631e+00,  -2.45641410e-01,
         -4.93652999e-01,  -1.53802204e+00,   7.97797814e-02,
         -1.48996726e-01,  -6.66978657e-01,   1.65711805e-01,
          1.53682029e+00,   2.49216080e-01,  -7.59747863e-01,
          7.52050355e-02,  -4.96835113e-01,   1.65724512e-02,
         -1.52182436e+00,   9.23890352e-01,   3.93407911e-01,
         -3.28310549e-01,  -8.86061668e-01,  -1.05737221e+00,
          2.45505071e+00,  -8.00789237e-01,   8.94858181e-01,
         -4.11370128e-01,  -1.55171371e+00,   5.08352101e-01,
         -9.53387499e-01,   1.13606775e+00,  -4.20957923e-01,
          7.53366113e-01,  -6.95217907e-01,   3.22560906e-01,
         -1.41082987e-01,   7.30462611e-01,   7.67465830e-01,
          2.40835235e-01,  -9.43944871e-01,   6.22783124e-01,
         -1.37055740e-01,  -2.18182278e+00,   2.30365485e-01,
          2.86235541e-01,   6.37093306e-01,   1.03475785e+00,
          2.02112570e-01,  -4.49046403e-01,  -1.62055540e+00,
         -8.57153594e-01,   1.26699734e+00,   3.73446077e-01,
         -3.27348918e-01,  -1.80117440e+00,  -3.20480093e-02,
         -2.04608098e-01,  -1.44677329e+00,   3.05840671e-01,
          1.84375420e-01,  -8.95217896e-01,  -1.06735384e+00,
         -6.55076265e-01,  -2.20221162e-01,   2.48574570e-01,
         -5.02541482e-01,  -7.11997211e-01,  -1.40508279e-01,
          1.91649973e+00,  -1.48094499e+00,   7.51634479e-01,
          1.34761691e+00,   3.20442230e-01,   9.60936487e-01,
         -1.19837952e+00,  -2.66969061e+00,  -9.34391394e-02,
          5.27702093e-01,  -3.10215712e+00,   2.71029234e-01,
          1.81318209e-01,  -4.28294055e-02,  -1.34205449e+00,
         -8.57224107e-01,  -1.21778703e+00,  -5.36883250e-02,
         -5.59774756e-01,   1.79406977e+00,  -8.41634512e-01,
          3.72039348e-01,  -2.53998113e+00,  -1.50978851e+00,
          5.55885673e-01,   1.07909665e-01,   1.11268187e+00,
         -1.37666953e+00,   1.17521808e-01,  -2.60952115e-01,
          2.51032025e-01,   5.94413280e-01,   1.37556159e+00,
          6.56386435e-01,   7.19325006e-01,   3.61090809e-01,
          1.31752133e+00,   1.32535398e+00,  -1.08683014e+00,
         -1.87055796e-01,  -6.50638223e-01,  -1.71927667e+00,
          1.65605831e+00,   1.71931252e-01,  -3.02331932e-02,
         -4.15084898e-01,  -9.85926807e-01,   1.16541974e-01,
         -1.30422962e+00,   3.17402691e-01,   1.71063077e+00,
          4.54549193e-01,  -1.27560854e+00,  -4.55311209e-01,
         -1.72573507e+00,  -8.81045103e-01,   1.04730569e-01,
         -1.17039418e+00,   2.01439977e+00,   2.55889326e-01,
         -5.61110713e-02,  -1.44391215e+00,  -2.36331511e+00,
         -3.72604668e-01,   9.42972839e-01,   8.59401226e-01,
          1.25858140e+00,  -9.41172004e-01,  -3.36117372e-02,
         -1.06534338e+00,  -1.13696742e+00,  -1.04413354e+00,
         -7.89196908e-01,   2.00451064e+00,  -1.93853602e-01,
          3.13512892e-01,  -3.03817719e-01,  -8.04859698e-01,
         -1.95716560e+00,  -1.03446293e+00,  -1.07354790e-01,
          1.18491185e+00,  -4.11621422e-01,   6.83324993e-01,
          6.81254804e-01,  -1.74734890e-02,  -1.07716583e-01,
         -1.06789730e-01,   9.15462434e-01,  -1.34588110e+00,
          2.62658954e-01,  -7.88015723e-01,  -7.42494106e-01,
         -5.54709375e-01,   1.10739875e+00,  -1.33411026e+00,
          1.41661406e+00,   3.81778121e-01,  -6.90007150e-01,
         -8.57861638e-01,  -1.09430885e+00,  -2.36567706e-01,
         -2.35813904e+00,   1.05396545e+00,   3.87365222e-01,
         -1.81923354e+00,   6.20441258e-01,   8.90031457e-01,
         -1.56834197e+00,   2.74255037e-01,   1.44217223e-01,
         -6.31276071e-01,  -5.85976720e-01,   2.00079346e+00,
          8.08666527e-01,   2.62213588e-01,   1.09353757e+00,
          4.51944053e-01]], dtype=float32))
(u'Actor_hidden_0_256_b:0', array([ 0.07214922, -0.78546345, -1.00438392,  0.19715561, -1.48504877,
        0.06085639,  0.47990698,  0.56901807,  0.83275473, -1.90649211,
        1.03870821, -1.47584236,  0.72566807,  0.01081594,  0.27557841,
       -0.49429631, -2.17523766,  0.53676444, -0.2180678 ,  0.44604412,
        1.21879053,  1.32373595,  0.02005956,  1.85795105, -1.26615715,
        0.22453173, -0.30329981, -0.12747328,  0.90810901, -1.70135689,
        0.84353435, -0.87940687,  0.00431902, -0.34795865,  1.4399488 ,
       -1.0446744 , -0.20907804,  0.71967763, -1.26791942, -0.21812858,
        2.40533853,  0.3383005 ,  0.69449294, -1.30670977, -0.77633798,
       -0.07685676,  1.85321629,  1.10277557,  0.9368543 ,  0.49218932,
       -0.63768691, -0.21799085,  0.10742061,  0.83987677,  1.32157934,
       -2.18173099, -0.87835741, -0.222487  ,  2.11840892, -1.00852764,
        0.94938886,  0.12695195, -0.19481376, -0.35986832,  1.29220986,
        0.35643941,  1.19123113,  0.20394664,  0.46481544,  0.01852932,
        0.88732266,  0.58767533,  1.40355909,  0.31477895, -1.24114788,
        0.69218922,  0.05155312,  0.56589603, -0.38991308,  1.79492164,
        1.48644149,  1.13645566,  0.23842973,  0.47975585, -1.14313471,
        0.13942076, -0.84264344, -0.10642988,  1.09958386,  1.45138097,
        0.94575268, -1.70616555,  0.39221102, -0.03584361,  1.0872705 ,
       -0.37829629,  0.11538161, -0.44555256,  1.49062645,  0.27866712,
       -0.60631561, -0.56735408,  1.30270708,  0.06741523,  1.15089488,
       -1.49868309, -1.66847312, -1.29338384,  0.47143066,  0.62860668,
       -0.57687277,  1.31816578,  0.19437647,  0.31471309,  0.19436765,
        0.24029084, -0.36464721, -1.92371702, -0.01002108,  0.42618018,
       -0.53153521, -0.15312277, -2.19619513,  0.68469721,  1.09570742,
        0.38070014, -1.13714612,  0.06565613,  0.67372793, -1.36645854,
        0.73753226,  0.19567962, -1.14713776, -0.36725301,  0.65088421,
        0.22714865,  0.51036429, -2.15045571, -0.0275881 ,  0.09927447,
        2.11685181, -0.09816113, -0.89382857, -0.45263276, -1.07821953,
        0.61127269,  0.84506708,  0.56237006,  0.44608986, -1.04400241,
        0.97529835,  0.0106624 , -0.23265848,  0.3933807 ,  1.25996578,
       -1.2225945 ,  1.05040658,  0.03316668, -0.19714241, -0.57795733,
        0.34427527,  0.19605583, -0.84440023, -2.00090384,  1.30366349,
       -0.62255055,  0.53164619, -0.96031421,  0.86000836, -0.45682564,
        2.38795567,  0.60437953,  1.66458035, -0.23324808,  0.06611484,
       -0.0729653 , -0.19098383, -1.67569101, -0.75702471,  0.95057243,
        0.12666622,  0.08708459,  1.3382386 , -0.87650836,  0.01990085,
        0.75771427,  0.67315799,  0.70722902,  0.51008409, -0.70822579,
       -0.73659015, -0.35089159,  0.73571956, -0.27015549,  0.6171031 ,
        1.11198938,  0.06259083,  1.6938597 , -0.76959693,  0.41990301,
        0.23620869, -0.8559379 , -0.4823043 ,  1.17001283,  0.21909697,
       -0.66484755, -0.95528591,  0.85026169,  1.52215219, -0.88664263,
       -0.60047865,  0.22005011, -1.16632986, -2.91672182, -0.2011711 ,
       -0.07345106,  1.65519881,  1.08711147,  1.24205732, -0.14575133,
       -1.71870661,  0.27512389, -1.14330649,  0.59984338, -1.54440832,
        2.34930682,  1.4779644 ,  0.09797284,  0.93564773,  0.27105901,
       -0.51122558, -0.30610386, -1.14462459, -0.7583642 , -0.28796983,
        1.38812447, -0.96811396, -1.23090696,  1.5912894 , -1.09750903,
       -0.01966865,  1.42085934,  0.3411766 ,  0.39092228, -0.52724254,
        0.40238291,  0.15843235,  0.08066022, -1.75022304,  0.57436007,
        1.44835103, -0.43396157, -2.34229183, -0.64372277,  2.17161632,
        0.50309098], dtype=float32))
(u'Actor_hidden_1_128_W:0', array([[-0.05539365,  0.31018883,  0.00590796, ...,  0.36605144,
         0.07268371,  0.06087329],
       [ 0.01831991, -0.05476255,  0.08132376, ...,  0.00248242,
        -0.04855793, -0.02261225],
       [-0.03339755, -0.01112246, -0.01459376, ...,  0.00144716,
        -0.03286942, -0.04451962],
       ..., 
       [ 0.03937049,  0.04719676, -0.11512435, ..., -0.03109195,
         0.07422983, -0.21680765],
       [-0.02441513, -0.04510343, -0.11812051, ...,  0.02233244,
        -0.07023379, -0.05401293],
       [ 0.06101697, -0.02863118,  0.00343116, ..., -0.05506302,
        -0.00358154, -0.07913682]], dtype=float32))
(u'Actor_hidden_1_128_b:0', array([ 0.41621351, -0.42427388, -0.00422856, -0.30661556,  0.20730875,
       -0.33565906,  0.0327458 , -2.83684969, -1.36605275, -1.20085144,
        0.51125461, -0.02750342, -0.19149749, -1.07157135, -1.23377109,
       -1.43124604, -0.62777317, -0.18607704, -0.18397607, -1.25632215,
       -0.19910227,  0.49894005, -1.10552323,  1.42755497, -0.44990894,
       -0.87645924,  1.58189166,  1.37330508,  0.87148148,  0.78279603,
        1.8252095 , -0.39326489,  0.32325417,  1.30756783, -0.98303694,
       -1.708547  , -2.30173469,  0.39821398,  0.90269887,  0.74358702,
        1.43085849,  0.72567976,  0.16063614,  1.14393485, -0.84297389,
       -0.22155881, -1.13185596, -0.10502769, -0.84227687,  0.11755602,
       -0.87416226,  1.50072241, -0.5855366 ,  0.00696596, -1.09810233,
        1.41646147, -0.51249141, -0.1194506 ,  0.22569335,  1.50270474,
       -1.73161936,  0.68849361,  1.51078892,  1.1250205 , -0.06838015,
        1.72271013, -0.17295025, -0.98560554, -1.11576939,  0.76343685,
       -0.7187891 ,  0.7347036 , -0.07161925,  0.05494232, -1.80965519,
        0.94903499,  0.48048538, -0.42937776, -1.26070547, -1.16464114,
        1.17826951,  0.39140776, -2.340487  , -1.4403882 , -0.61025816,
       -1.19673908,  0.32465047,  0.51488942,  0.17014113,  1.82833135,
       -0.72343653, -0.3368305 , -0.24323779, -1.25199652, -0.01431947,
       -1.85499215,  0.16064359,  0.36242396, -0.01920447,  1.33663535,
        0.54003382, -0.01554809,  0.6696589 , -0.82424039,  1.59164667,
        0.45771787,  0.10717127, -0.79415166,  0.19502279,  1.42039669,
        1.42430317,  2.21474886,  1.01852524,  0.88905168,  0.24273457,
        0.32504621, -0.45245841, -0.39507934, -0.79255974, -0.88800091,
       -0.33243203,  0.35760346,  0.22480683,  0.69191027,  1.42765713,
        0.3327601 , -0.14378934, -0.29342738], dtype=float32))
(u'Actor_output_W:0', array([[ -6.97577447e-02],
       [ -1.28753796e-01],
       [ -8.56437534e-02],
       [ -1.13670349e-01],
       [ -7.30649680e-02],
       [ -5.08190282e-02],
       [  4.08321735e-04],
       [  6.30917847e-02],
       [ -4.91436273e-02],
       [ -6.82252645e-02],
       [  4.19773422e-02],
       [  4.76834327e-02],
       [ -1.59564981e-04],
       [ -9.82380807e-02],
       [ -3.56705822e-02],
       [  3.61164622e-02],
       [  4.56456169e-02],
       [ -9.02476460e-02],
       [ -4.33173068e-02],
       [  8.50057080e-02],
       [ -1.20780620e-04],
       [ -2.69063041e-02],
       [ -3.50558273e-02],
       [  6.90147504e-02],
       [ -3.58270190e-04],
       [ -3.41267228e-01],
       [ -5.98354127e-05],
       [ -3.80052514e-02],
       [  3.53333598e-06],
       [  1.46252532e-02],
       [  3.00613903e-02],
       [ -1.76609397e-01],
       [  2.42736414e-02],
       [  9.78254825e-02],
       [ -1.72467798e-01],
       [  9.08670500e-02],
       [  1.39877200e-01],
       [ -3.49021284e-05],
       [ -2.64384813e-04],
       [ -2.81390160e-01],
       [  8.17910284e-02],
       [  1.76551361e-02],
       [  2.46100109e-02],
       [  1.18735388e-01],
       [  2.93876454e-02],
       [  1.26786560e-01],
       [ -1.28468484e-01],
       [ -3.70533839e-02],
       [  6.46423995e-02],
       [  1.27451971e-01],
       [ -8.82922783e-02],
       [  1.31195327e-02],
       [ -6.31008968e-02],
       [  1.70819840e-04],
       [  1.36794880e-01],
       [  5.67814596e-02],
       [ -4.90942188e-02],
       [ -1.34425402e-01],
       [  4.86825816e-02],
       [  2.11245060e-01],
       [ -2.37583488e-01],
       [ -8.83534253e-02],
       [  2.06036042e-04],
       [  5.36053590e-02],
       [  9.22160521e-02],
       [ -5.18912449e-02],
       [ -9.65843871e-02],
       [ -5.60989007e-02],
       [  8.45848471e-02],
       [  2.62979828e-02],
       [  8.72070342e-02],
       [ -4.93118539e-02],
       [  3.43440734e-02],
       [  1.31267942e-02],
       [ -1.54403716e-01],
       [ -2.46749260e-02],
       [  8.10628235e-02],
       [  2.32190907e-01],
       [ -1.06252171e-01],
       [ -4.77600172e-02],
       [ -7.11765420e-03],
       [ -2.13555563e-02],
       [  6.98528485e-03],
       [  3.14043500e-02],
       [ -1.62781298e-01],
       [  6.96966350e-02],
       [ -1.05134875e-01],
       [  1.47987902e-01],
       [ -1.36359408e-01],
       [ -7.26194158e-02],
       [ -1.11592315e-01],
       [  2.86539588e-02],
       [  3.54902185e-02],
       [ -5.32247908e-02],
       [ -7.83200487e-02],
       [ -9.84988734e-02],
       [  5.74149974e-02],
       [  7.61928111e-02],
       [ -2.24248397e-05],
       [ -4.04784866e-02],
       [ -4.34139781e-02],
       [  6.09332062e-02],
       [  1.73147346e-04],
       [  1.62889495e-01],
       [  2.10282014e-04],
       [  1.06821597e-01],
       [  8.33736412e-05],
       [ -1.05954476e-01],
       [ -9.87091362e-02],
       [ -1.05999531e-02],
       [ -3.79648968e-03],
       [  1.45537360e-02],
       [ -2.07850360e-03],
       [  1.24130456e-04],
       [  6.42290190e-02],
       [  1.32774219e-01],
       [  2.72949748e-02],
       [ -2.61020381e-02],
       [  7.91526362e-02],
       [  3.14362049e-01],
       [  1.13056283e-02],
       [ -6.49069250e-02],
       [  6.73613101e-02],
       [  9.70361680e-02],
       [ -2.37057888e-04],
       [  8.47318247e-02],
       [ -6.46101683e-02],
       [  8.02126080e-02]], dtype=float32))
(u'Actor_output_b:0', array([-1.5074873], dtype=float32))
(u'Model_encoding_0_128_W:0', array([[ 0.49135983,  2.02494955, -0.07118495, -0.27534938,  0.30721453,
         0.17636266, -0.49619541, -0.08792351,  0.10178363,  0.78314883,
        -0.91556346, -1.19868577, -0.19927065, -0.11085794,  0.61602956,
        -0.35949484, -0.95257956,  0.39195541, -0.08059894, -0.55432796,
         0.89362639,  0.64887977, -1.25068986,  0.03805911, -0.35016337,
         0.52542078, -0.03747638,  0.61598259,  0.40774062, -0.28334215,
         0.71563554,  0.66326678,  0.3492575 , -0.0668598 , -0.61655718,
         0.83253944,  0.57401955, -0.25258547, -0.38941172, -0.55221194,
         0.29106081,  0.6213333 , -0.47420859, -0.50257218,  0.19875187,
        -0.11892499, -0.51087332, -0.67479306,  0.81231612,  0.27198434,
         0.31672797,  0.67245597, -0.46344236,  0.96861231,  0.92614084,
         1.03407919,  0.60647541,  0.45851532,  0.39213786,  0.56564456,
        -0.12938334, -0.38642472, -0.82032609,  1.19363606,  0.94817114,
         2.46784163,  0.41275638, -0.32228535,  0.47796771, -0.72206002,
        -0.69513828,  1.90852582,  1.54587805, -0.61194736, -0.12164801,
        -0.35599893, -0.77916074, -0.34515893, -1.05292928,  0.33743718,
        -0.2704635 , -0.08678628, -0.67061329,  0.05448839, -0.01044307,
         0.96258944,  1.2116791 , -1.05093348, -0.31048876, -0.04458882,
        -0.12112316, -1.01571238,  0.33702984,  0.22812141,  0.37785721,
        -0.69977874, -0.06910076,  0.91558307, -0.66318876,  0.6867215 ,
        -1.33307838,  0.37420198,  0.32248434, -0.55450475, -0.59395701,
         0.91960901,  0.55043966, -0.40009674, -0.82186019, -0.03930127,
         0.27047688, -0.93537116, -0.32989272, -1.6692028 , -0.86406302,
         0.42207715, -0.07763728, -1.26062214, -0.04499668, -0.68968737,
        -0.5175795 ,  0.83327585, -2.11631346,  0.37136847, -0.50874996,
        -0.63587552, -0.64337575, -0.09352721],
       [-0.21632023,  0.1125165 ,  0.23420781,  1.83606517, -1.45025277,
        -1.29272211, -0.37774491,  0.58434677, -1.15546095,  0.28359711,
        -1.11189711,  0.85457045, -0.45618621, -0.77412546, -0.10723072,
         0.71687603,  0.61412781, -0.69724941, -0.85425532, -1.13510406,
        -0.09079495,  0.02706167,  1.76157761, -0.07366968,  0.71523565,
         0.59958601,  0.05047134, -1.58071971, -0.03308054, -0.45993721,
        -0.28048748,  0.24857736,  0.00699796, -0.1329392 , -0.73971921,
        -0.57535619,  0.50437748,  1.75749731,  1.06307793,  1.46324718,
         0.08598807, -0.91971999,  0.29750925,  1.39606547,  0.318988  ,
         0.3715176 , -0.38965905, -0.91044903, -0.4318774 , -0.3383778 ,
        -0.69236249, -1.05120885,  0.77660662, -0.25631592, -0.81724107,
        -0.64480901,  1.18971515,  0.7911014 ,  0.01660153, -0.44471833,
        -1.12535787, -0.70930743, -1.02760065, -0.674559  ,  1.32394922,
         0.90676254, -0.612688  ,  0.71717602, -0.82950389,  0.28984794,
        -0.99202704,  0.31038326,  0.14484826, -0.53702027, -1.56600475,
        -1.36087525,  0.63819546,  0.06602522, -0.42591518, -0.50585854,
         0.86389244,  0.56902802, -0.03673161,  0.27511817, -1.08772337,
         0.06532306,  0.44089395, -0.18530779,  0.24156006, -0.20814173,
         0.50527835,  0.08122739,  0.78474092, -1.29352689,  0.46182624,
        -1.31591129,  0.8337943 , -0.71795368, -0.03038207,  1.67671323,
         0.48568043, -0.50704873,  1.53369415,  0.22075017,  0.18895218,
        -0.34750399, -0.09676594,  0.25954738,  0.719872  ,  0.04587094,
        -0.56216592,  0.15374111,  0.09782296,  0.58309281, -1.28402305,
         0.48515248,  0.71122861, -1.10889125,  0.966434  ,  0.49177578,
         1.02336919, -0.44606709, -0.05176596, -0.30860946, -1.74722171,
         0.18725264, -1.83788133,  1.20481563]], dtype=float32))
(u'Model_encoding_0_128_b:0', array([-1.22562611, -0.33430201,  0.56660998, -0.5525707 ,  0.21461278,
       -0.38869458, -0.7084769 , -0.51471567, -0.19620588,  1.40862381,
        2.76085377, -0.36599094, -1.69590938,  0.41390118,  1.1513598 ,
        0.07737302, -0.4868412 ,  0.12126729,  0.06855105, -0.45940802,
       -0.25970504,  1.31789851,  0.13902006,  0.40363348, -0.98300397,
       -0.05524224, -0.02177958, -0.0455171 ,  1.53305733, -0.49246743,
        0.01723312,  1.04680634,  0.45212916,  0.88116866, -0.56553251,
       -1.24109125,  0.12304506,  0.22383091, -0.2370618 , -0.39416251,
        0.16613878,  0.28619722, -0.17757654, -0.32183695, -0.7355907 ,
        0.16254038,  0.31476912, -0.67230785, -0.48046842,  0.01776509,
       -0.0345089 ,  0.27923366, -0.87087631, -1.83303523,  0.08506585,
        0.82296258,  1.04164279, -0.54111725, -1.05920422,  0.3554152 ,
       -0.35237351,  0.23148172, -1.52304363, -0.82314396, -0.82826853,
       -1.10440946,  0.18286546, -0.02940737,  0.15371189,  2.44524598,
       -0.82687199,  0.95948046, -0.60236788, -0.75206769,  0.25964358,
       -0.75476152, -0.57184225,  0.25722277,  0.01610618,  2.16282105,
       -0.35516462, -0.23121892,  1.4377532 , -0.73859173,  0.34548634,
        1.1461699 , -0.17399816, -1.06097472, -0.2220127 , -1.00130618,
        0.1209299 ,  0.16891114,  0.62066668, -0.02067512, -0.02212401,
       -1.045331  , -0.18486097,  0.71227813, -1.1863023 ,  1.27522814,
       -0.59231478,  1.16266322, -0.11060551,  0.08405732,  0.69582731,
        0.89554018,  1.15385723,  0.1005926 , -0.27613667, -0.02335725,
        0.02979112, -0.83570695, -0.35366011, -1.58089709, -1.13343942,
       -0.57095158,  0.01899027, -1.65908706,  1.6949755 , -0.51830995,
        0.56891799,  0.74736381, -1.5402317 ,  0.2252368 , -0.84600592,
        0.04043945,  0.89700454,  0.06171588], dtype=float32))
(u'Model_encoding_1_128_W:0', array([[ -6.74038887e-01,  -2.42632478e-01,  -2.70694375e-01,
         -1.36149275e+00,   5.77482907e-03,  -1.04053104e+00,
         -3.92826110e-01,  -4.07096976e-03,  -8.19363058e-01,
         -1.27280581e+00,   1.38489619e-01,  -9.31482613e-01,
          1.12993348e+00,   1.20693147e+00,  -5.84634364e-01,
         -8.62289667e-01,   2.32649699e-01,   3.25781941e-01,
          7.75916219e-01,  -2.88255960e-01,   8.87540653e-02,
         -9.49007273e-01,  -2.37658229e-02,   3.37574899e-01,
         -7.07585335e-01,  -2.81490460e-02,   7.28739649e-02,
          3.97290140e-01,  -7.06052482e-01,   2.36796169e-03,
         -2.28467181e-01,  -1.65826213e+00,   1.66265249e-01,
         -6.38918066e-03,  -1.31265140e+00,   1.45530403e-01,
          2.95098638e-03,  -2.50877190e+00,   2.89891571e-01,
         -6.64273620e-01,   1.98178923e+00,   5.19425492e-04,
          4.70856577e-01,   9.01547253e-01,   1.23962246e-01,
          3.15625742e-02,   1.02134213e-01,  -7.97283113e-01,
          9.13206756e-01,  -1.02256134e-01,  -4.44302947e-04,
         -4.93920386e-01,   3.41730922e-01,  -1.64214277e+00,
          5.32873213e-01,  -4.86214682e-02,   1.28787048e-02,
          1.44601691e+00,  -1.17424059e+00,  -4.24023718e-01,
          4.80913907e-01,   7.00498968e-02,   9.51930955e-02,
         -1.26806036e-01,   2.71445233e-02,   3.08014810e-01,
          6.79824471e-01,  -1.61754596e-03,   1.46525607e-01,
         -6.62060618e-01,   1.55752734e-03,   3.82252514e-01,
         -5.27539313e-01,  -5.47979653e-01,   1.35313082e+00,
         -2.21263781e-01,  -2.06067450e-02,   5.46923637e-01,
         -1.02278209e+00,   1.66535258e-01,  -2.16320087e-03,
          3.94856870e-01,  -8.32624197e-01,  -7.55917430e-01,
          3.65004279e-02,  -2.52826869e-01,   1.58670557e+00,
          9.77709949e-01,   4.27982658e-01,   7.81281531e-01,
          2.79629184e-03,  -1.36064231e+00,  -5.27529061e-01,
          5.01257360e-01,   6.09868839e-02,  -5.28920650e-01,
         -6.81317300e-02,   4.95825021e-04,  -3.27689230e-01,
          1.85540353e-03,  -1.67728925e+00,   9.72414738e-04,
          1.50900315e-02,   1.82245231e+00,  -1.21773372e-03,
         -2.47350079e-03,  -2.86400318e-03,  -5.90594560e-02,
         -5.30899167e-01,   2.91494787e-01,   1.34134024e-01,
          2.47480441e-02,   1.81875529e-03,  -9.21685755e-01,
         -6.85796261e-01,  -5.71711138e-02,  -9.09894884e-01,
         -2.09849283e-01,  -3.01176012e-02,   2.04622924e-01,
         -5.06705081e-04,  -3.02778751e-01,   3.94692868e-01,
         -3.75227392e-01,   1.57832336e-02,  -4.42885328e-03,
         -1.22451149e-02,   2.28623264e-02]], dtype=float32))
(u'Model_encoding_1_128_b:0', array([ -1.04700732e+00,  -6.88041270e-01,  -1.55732608e+00,
         5.89378119e-01,   3.10630491e-03,  -4.43711877e-01,
         1.56140089e+00,   1.69414186e+00,  -6.35807589e-02,
        -7.21657500e-02,  -3.94345224e-01,  -1.61118472e+00,
        -2.09853220e+00,  -1.74829650e+00,  -1.63650048e+00,
        -8.19590211e-01,   1.39029658e+00,  -1.08105826e+00,
         9.54563320e-01,  -2.45660558e-01,   1.58328325e-01,
        -8.75376523e-01,   2.66582966e-01,  -1.87432781e-01,
         7.21246660e-01,  -8.40381458e-02,   1.19447672e+00,
        -4.73600537e-01,   1.45873940e+00,   8.10104772e-04,
        -2.05817208e-01,  -1.52752376e+00,  -1.10937726e+00,
        -7.23525463e-03,  -4.19941872e-01,  -2.71613806e-01,
         3.62458290e-03,   1.70241439e+00,   1.91718888e+00,
         2.45949292e+00,  -2.03623682e-01,   2.47402489e-03,
        -3.13374639e-01,  -6.79554582e-01,  -8.46309721e-01,
         3.22608016e-02,   8.90047431e-01,  -7.44785726e-01,
        -1.72862804e+00,   2.01020598e-01,  -2.44903684e-01,
        -1.13609660e+00,  -1.55704689e+00,   1.66330171e+00,
         1.32214391e+00,   2.39878923e-01,  -1.12259543e+00,
         1.34257102e+00,  -9.98122990e-02,  -1.07564473e+00,
        -2.33261657e+00,   1.90955549e-01,   8.28436136e-01,
        -4.20752496e-01,   5.85470438e-01,  -6.93775341e-02,
        -4.93193954e-01,  -2.67040828e-04,  -5.44483900e-01,
        -2.58720726e-01,  -4.89026308e-04,  -1.60552517e-01,
        -1.49622440e-01,   1.54853606e+00,  -5.87168396e-01,
        -6.35198772e-01,   2.25095987e-01,  -5.94152927e-01,
         1.99102557e+00,   1.50338161e+00,  -8.26210380e-05,
        -4.27157700e-01,  -1.61091459e+00,  -1.39991927e+00,
         6.07062042e-01,   2.01317146e-01,   1.30773172e-01,
        -7.99341738e-01,   1.38938677e+00,   7.16052949e-01,
         3.36138974e-03,  -2.32544374e+00,   5.17115831e-01,
         4.42067355e-01,  -6.00564420e-01,   9.90183115e-01,
        -1.35041907e-01,   9.41010192e-04,   3.91412795e-01,
         2.52685742e-03,  -1.16461420e+00,  -2.11624909e-04,
        -1.02375209e+00,   1.72988999e+00,   1.85421528e-03,
        -1.02008716e-03,  -1.70282915e-03,  -6.33999705e-02,
         4.61248547e-01,  -1.92970470e-01,  -1.26391459e+00,
         2.91066375e-02,   8.59046530e-04,  -1.53414616e-02,
        -5.30382633e-01,   8.29643488e-01,  -1.87574756e+00,
        -5.31976819e-01,   1.51012003e-01,  -3.30351621e-01,
        -1.81397959e-03,  -1.44155920e+00,  -1.75739333e-01,
        -1.05852532e+00,  -3.65820862e-02,  -4.42587712e-04,
        -1.21881319e-02,   2.14551277e-02], dtype=float32))
(u'Model_hidden_0_128_W:0', array([[ 0.04134899, -0.00745308,  0.19191252, ..., -0.03076985,
         0.06962755,  0.02455787],
       [ 0.17484324,  0.19093306, -0.00090504, ...,  0.0118733 ,
        -0.02191254,  0.01320199],
       [-0.03764554, -0.06488659, -0.12693322, ..., -0.0074618 ,
         0.12626301, -0.07434369],
       ..., 
       [ 0.09168717,  0.00941177,  0.08883987, ...,  0.02868319,
         0.1278563 ,  0.0757451 ],
       [ 0.05085058, -0.02898657,  0.09589181, ...,  0.0288339 ,
         0.04577804,  0.062384  ],
       [-0.08041579,  0.00115247, -0.08465376, ..., -0.02095235,
         0.06077712, -0.13398924]], dtype=float32))
(u'Model_hidden_0_128_b:0', array([ 2.94011045,  0.48840877,  1.20935738, -0.04952197,  0.61876911,
       -0.89692044, -2.19924617, -0.38059813, -0.25143304,  2.32927966,
       -1.51613474,  0.17767864,  0.53061426,  0.10266392,  0.41245803,
        0.95076936, -0.01063254, -0.07422983, -0.18641002,  0.42750645,
       -0.58535057, -1.17651832,  0.64183885, -1.87774849,  0.2006838 ,
       -0.58841062,  0.67821777,  1.11258399,  0.05622358,  0.42529657,
       -0.08554213,  0.20395184, -0.28692102, -0.58176321, -1.93088222,
       -0.06197373,  0.70613688, -1.41740716, -1.01538813,  0.68636763,
        0.64668787, -0.60642546,  1.01247728, -0.06100448,  1.17552221,
       -1.02241528,  0.575122  , -1.75160897, -0.34619603,  0.5212999 ,
       -0.33406183,  0.10332689, -0.21816114,  1.57845283,  0.65752804,
       -0.91256273, -0.08170488,  0.74671906, -1.70069063, -0.88528955,
        1.2827822 , -1.5063051 ,  0.13392296,  1.05593956, -0.34578806,
        0.61251688, -1.06267202,  3.15583706,  0.30452988, -0.39726979,
       -0.26331845,  1.36659849,  0.69692278, -0.86336941,  0.92846817,
        0.30386725,  0.43000257, -0.48851153,  1.8062706 ,  0.71929646,
        0.87697315,  1.66088271, -0.26088172,  0.90015447, -0.0402176 ,
       -0.63924247,  1.85995018, -0.94283044,  1.27358222,  0.93061185,
        0.31474483,  1.24380469,  0.91937011,  2.78511739,  0.27723432,
        0.23309611, -3.15904236, -1.42684078, -1.97282159, -0.98741335,
       -1.01541901, -1.28754437, -0.53513163,  1.35619116,  1.37352538,
       -0.48469797, -2.18140173,  0.41599897, -1.54598212, -1.12542593,
       -0.23337622,  1.75334847, -0.32699126,  1.8831619 ,  0.62134242,
       -0.42180139, -0.4066692 ,  1.20960999,  0.39590666, -0.98209816,
       -0.15477884,  1.069754  , -0.22998421, -0.05455698,  0.17887689,
        0.24888602, -2.6922524 , -1.29348612], dtype=float32))
(u'Model_output_W:0', array([[  2.43938668e-03,   5.59420884e-02],
       [  2.25047506e-02,   6.99265674e-02],
       [  5.06469123e-02,  -2.16989145e-02],
       [ -4.25893348e-04,   3.53579489e-05],
       [  5.84393321e-03,   1.16009144e-02],
       [  6.32881820e-02,   1.29767090e-01],
       [ -1.18366070e-01,  -2.20542680e-02],
       [ -2.34604580e-04,  -1.57151357e-04],
       [  8.71375742e-05,  -1.96916502e-04],
       [  1.80203700e-04,  -1.79807648e-05],
       [  1.40943909e-02,   2.42589191e-01],
       [  2.91988108e-04,  -8.20583518e-05],
       [  7.61878788e-02,  -1.46540266e-03],
       [ -1.35733026e-05,   2.58095642e-05],
       [ -5.57361345e-04,  -7.53637141e-06],
       [  1.80406234e-04,   1.96784575e-04],
       [  9.19854665e-06,  -2.62969523e-04],
       [  1.72195584e-02,   5.08184731e-02],
       [ -1.12305582e-01,  -1.45441471e-02],
       [  1.66822283e-04,   6.87739011e-05],
       [ -1.56990960e-02,   2.14757144e-01],
       [  1.46314159e-01,  -3.55630182e-02],
       [ -1.53432484e-04,   1.78284565e-04],
       [  5.36936820e-02,   6.68627582e-03],
       [  1.66055434e-05,   2.15135820e-04],
       [  2.71515048e-04,   6.14340301e-04],
       [ -3.89422494e-05,   2.00354363e-04],
       [ -3.50878894e-04,   3.18130442e-05],
       [  1.17975833e-06,   1.29534266e-04],
       [  1.27743930e-04,   3.75035015e-04],
       [  7.62348473e-02,   9.76544246e-02],
       [  6.19688595e-04,  -4.33973546e-05],
       [ -3.33765000e-02,  -5.27138039e-02],
       [  2.78572291e-02,  -9.75120366e-02],
       [  2.91663222e-03,   1.48846926e-02],
       [  3.64067638e-03,  -6.82678400e-03],
       [ -2.45546061e-03,  -3.63216922e-02],
       [ -2.28272937e-03,   3.82876955e-03],
       [ -3.08104005e-04,  -1.64364174e-04],
       [  5.06984885e-04,  -6.68458460e-06],
       [  5.00665803e-04,   7.57608941e-05],
       [  2.32965942e-03,  -1.32583633e-01],
       [  8.81642918e-04,  -3.21783446e-04],
       [ -1.86123997e-01,  -9.44216773e-02],
       [  3.90325077e-02,   3.63103230e-03],
       [  1.23859264e-01,   1.74072664e-02],
       [ -9.47408727e-04,   5.07034711e-04],
       [  8.14564526e-03,   5.28435670e-02],
       [  1.69526801e-01,  -1.46105057e-02],
       [ -6.09674666e-04,   4.23932433e-05],
       [  6.88266446e-05,  -2.04836779e-05],
       [ -5.00626105e-04,   3.09266470e-04],
       [ -2.90150847e-03,  -8.17328170e-02],
       [  1.96454028e-04,   3.61040962e-04],
       [  6.80968456e-04,   5.10167592e-05],
       [  1.57216743e-01,  -4.29098420e-02],
       [  8.35290775e-02,  -4.17324901e-03],
       [ -9.30454880e-02,  -2.49522049e-02],
       [  2.25731498e-03,  -1.23999543e-01],
       [ -1.78310758e-04,  -3.03901870e-05],
       [ -1.63990961e-04,   2.82760651e-04],
       [ -6.85344264e-02,  -3.85562591e-02],
       [  3.30738781e-04,   1.73869834e-04],
       [ -3.48665693e-04,   7.29520980e-05],
       [  3.85797257e-03,   1.08272962e-01],
       [ -3.35904682e-04,  -1.16648211e-04],
       [ -1.25270143e-01,  -5.09848781e-02],
       [  1.66047795e-03,  -1.61046665e-02],
       [ -7.54345892e-05,   1.18037329e-04],
       [ -1.22764986e-02,  -3.29930224e-02],
       [ -9.49943089e-04,  -4.53803877e-05],
       [ -3.49162117e-04,   1.60069263e-04],
       [  2.85913935e-04,  -2.71881348e-04],
       [  3.20295594e-03,   4.36021984e-02],
       [ -1.26550794e-01,  -1.40310107e-02],
       [ -1.55971274e-02,   1.48384012e-02],
       [  6.21559157e-04,  -1.26169645e-04],
       [  1.98855196e-04,   4.03717204e-05],
       [  8.05129166e-05,  -7.99933114e-05],
       [  6.64928986e-04,  -3.53904936e-04],
       [  9.52460920e-04,   1.72379194e-04],
       [ -6.42936036e-04,   1.64607962e-04],
       [  1.11896894e-03,   2.28235585e-04],
       [  2.50327837e-04,  -1.57524846e-04],
       [  9.99083072e-02,   6.63149217e-03],
       [ -2.08789104e-04,  -1.70978659e-04],
       [  5.13488703e-06,   1.45796337e-04],
       [ -1.31466389e-01,  -2.27892660e-02],
       [  4.35177266e-04,   3.81944847e-05],
       [  3.05229391e-04,  -1.99911156e-05],
       [  2.73365411e-04,  -9.41671169e-06],
       [  5.50587662e-03,   4.43387181e-02],
       [  1.15521610e-01,  -8.18602517e-02],
       [  1.62955359e-04,   2.40612397e-04],
       [ -2.48767063e-03,  -3.12391911e-02],
       [  7.64931901e-04,  -8.77394923e-05],
       [ -1.86761320e-01,  -4.58077863e-02],
       [  3.01664546e-02,   4.22456339e-02],
       [  1.04119495e-01,  -5.33892214e-03],
       [  1.60375595e-04,  -6.70122899e-05],
       [  3.44490370e-04,  -1.51264452e-04],
       [  8.32473710e-02,  -9.10020024e-02],
       [ -4.95950356e-02,   2.77279988e-02],
       [  8.89947638e-02,   1.93049699e-01],
       [ -6.44757165e-05,   1.69104344e-04],
       [  5.60516119e-02,  -2.49898084e-03],
       [ -1.47032004e-03,   5.81870005e-02],
       [ -1.05640665e-03,  -3.17052414e-04],
       [ -8.27459097e-02,   3.59309465e-02],
       [  1.75866950e-02,   1.18414298e-01],
       [ -8.75813930e-06,  -4.87862009e-04],
       [  1.87063735e-04,  -7.66512458e-05],
       [ -8.50754678e-02,  -1.52024820e-01],
       [  1.74952816e-04,  -4.01858502e-04],
       [  4.23618418e-04,   4.84474993e-04],
       [  1.60368072e-04,   2.68101518e-04],
       [  3.10265605e-05,   2.09181773e-04],
       [ -5.96750069e-05,  -1.93356856e-04],
       [  3.38303633e-02,  -8.45330805e-02],
       [ -1.12265363e-01,  -5.32370433e-02],
       [  4.81329411e-02,  -2.80063711e-02],
       [  4.29912849e-04,  -2.51988404e-05],
       [ -6.47091493e-03,   3.51689430e-03],
       [ -2.69410620e-03,  -3.79675291e-02],
       [ -3.05774971e-03,  -3.61146927e-02],
       [ -3.34236938e-05,   2.72341014e-04],
       [  8.26384348e-04,  -2.88785668e-04],
       [  4.05267179e-02,   7.65464604e-02]], dtype=float32))
(u'Model_output_b:0', array([ 0.70250142,  0.06752016], dtype=float32))
(u'Reward_encoding_0_128_W:0', array([[ 2.5925715 ,  1.25417209, -1.26291072,  2.13559532, -1.71008313,
        -0.73086613, -2.26742673, -1.47413111,  0.86606514, -1.25549543,
         1.601524  , -1.72478485,  0.27136588,  2.33699608, -0.50039101,
         2.9648869 ,  1.62520564,  0.36673763, -0.73799253,  2.38690448,
        -2.15540218,  2.1482439 , -0.02013778,  2.15168858, -1.28366899,
        -1.77823496,  2.86371589, -1.03575015, -3.02416563,  1.91179526,
         1.90555871,  2.5816958 , -1.64370668, -0.74010253, -1.84913039,
         2.03911734,  2.35963535, -0.42140922,  1.33129215,  0.14507623,
        -2.35177493,  0.92792273,  2.40959978,  2.0891633 ,  1.40986061,
         3.34300256,  1.94283569,  1.59952044,  2.31795406,  1.56604159,
         0.10818011,  0.8710714 ,  3.18787289,  1.52789986,  3.09617043,
         1.90274012, -0.1540709 , -1.70567   ,  1.66119099,  4.22109318,
         1.54846942,  1.81622338, -1.39591491,  3.00529766, -1.53897858,
         0.98128682,  1.62096667,  1.83414936,  2.70502496,  2.16010785,
        -2.40697813, -1.98604143, -3.16975856, -1.96855569,  1.90068805,
         2.62304115, -1.32634223,  2.53233933, -2.22632217,  1.15141726,
        -1.28295195,  2.60403514,  3.07142067,  2.91946769,  2.0002594 ,
        -2.38268399, -1.70151794,  2.41391969,  0.17436765,  2.42355895,
        -0.02855963,  0.83681923,  1.06309462,  2.03629327, -0.30931363,
        -2.42069697, -2.31475067,  0.4983893 , -1.89014888,  3.51974654,
         0.46744192, -1.54447353, -2.25630784, -0.74816382, -2.40577126,
        -0.9814328 , -0.90963966,  2.86127448, -1.63319778,  2.23488379,
         2.748806  ,  2.16255116,  1.82304609,  1.42134655,  2.19166851,
         0.61479986,  1.77983391, -1.48165655, -2.29527354,  1.7839334 ,
        -2.09698129,  3.50800323,  2.11703658,  0.26864848, -2.7061677 ,
         2.59127712,  0.3949452 , -0.10404633],
       [ 0.02497612,  2.03607321,  1.5314616 ,  0.18856528,  0.91762394,
        -1.94377255,  0.4258534 ,  0.1502945 , -1.88310504, -2.57327414,
        -0.62320107, -2.44506001, -2.05389857, -1.2437166 , -2.76261091,
         0.78037077, -2.1091795 , -3.3455174 , -1.82638621,  0.29248336,
        -0.1620992 , -0.56813431,  3.32576275,  0.50758278, -0.5962193 ,
         0.38917756,  0.35429448,  1.57817101, -0.64520919, -0.77899939,
         1.38038206, -0.33662939, -0.6122281 ,  2.4006412 ,  1.5236572 ,
         0.30089423, -0.56876022, -1.63111091,  1.97148955,  1.59371889,
         0.20409633,  1.73359275,  2.04132771, -2.14762926, -0.46807739,
         0.16695313, -0.95788449, -0.8182081 , -1.13116527, -0.36138344,
        -2.58111453, -2.04943347, -0.46909854,  2.83260584, -0.49712887,
        -0.77164567,  2.96313548, -0.10525791, -1.57250118, -0.79337305,
         1.63283813,  1.86312926, -0.45089582, -0.8467229 , -1.10746789,
        -0.71921527, -1.52906907, -0.2855494 ,  0.24281535,  1.64793825,
         0.91479862,  1.98685539,  1.37949729,  0.87017977, -0.67403573,
         0.11254553, -1.40369666,  1.23851442,  2.33857656,  0.81937772,
        -1.16069937, -0.64599925,  1.15474403,  0.48577413,  0.88328451,
         0.22059409,  1.66202581,  1.39403403, -2.07324886,  0.54862881,
         2.29776764,  2.03299522, -3.15009379, -0.4524388 ,  0.80189514,
        -1.15655386,  0.75667381,  2.0122931 ,  1.0037626 ,  0.1207225 ,
         3.02643228, -3.57754064,  1.03528857, -2.93485045, -0.01440706,
         1.71545529,  1.68910813, -0.41797295, -1.67322934, -1.40101683,
         0.42059919, -0.80745572,  1.02882624,  0.73659497, -0.89047903,
         2.49539876, -2.99796247,  0.05994392,  0.67272764, -0.82278854,
         0.45440042, -0.54082972, -0.25799248, -2.23502922,  0.87125587,
         1.66745794,  2.1647954 ,  2.25947809]], dtype=float32))
(u'Reward_encoding_0_128_b:0', array([-0.96865088, -0.94214916,  0.72433484, -0.88120192,  0.88101649,
        1.41014421,  1.03241634,  0.69384986,  0.11696249,  2.0091908 ,
       -0.437576  ,  1.55131328,  0.92930162, -0.54778117,  1.77904975,
       -1.42272246,  0.1350178 ,  1.69373262,  0.93464112, -1.0553925 ,
        1.08413136, -0.36057922, -0.2688444 , -0.95787781,  0.8627308 ,
        0.75436062, -1.26439893,  0.58768952,  1.91256511, -0.52541548,
       -1.09062552, -0.9279058 ,  1.38239551,  0.52701181,  0.68503076,
       -0.91527313, -0.75198466,  0.73702061, -1.09431183, -0.14061858,
        1.18979537, -0.64636642, -1.69840896, -0.0527797 , -0.17902611,
       -1.3533051 ,  0.01562131, -0.46203947, -0.53930801, -0.51386601,
        0.74719924, -0.13414717, -1.12633514, -1.20644355, -0.69137406,
       -0.50292444, -0.98633581,  0.86323023, -0.09097352, -1.42593861,
       -0.98045987, -1.15958905,  0.8630178 , -0.91646588,  1.02707601,
        0.36428317, -0.093058  , -0.64619309, -0.90231776, -1.45714784,
        1.56196547,  1.37256563,  1.3342644 ,  0.90840971, -0.53832078,
       -1.0378387 ,  1.62434864, -1.31567526,  0.83767378, -0.68889123,
        0.937675  , -0.82967728, -1.57376778, -1.34173834, -1.03628552,
        1.17479491,  0.84152639, -1.33457923,  0.65665078, -1.14802575,
       -0.04882829, -0.55851394,  0.70935822, -0.66723561, -0.26025942,
        1.6728996 ,  1.02916193, -0.32531077,  0.79830086, -1.40541923,
       -0.60898817,  1.82987094,  0.9916361 ,  2.14866924,  1.2095325 ,
        0.30391014,  0.8101458 , -1.01301241,  1.69582486, -0.39743313,
       -0.7658025 , -0.60973853, -0.99169183, -1.55712497, -0.57767808,
       -0.46843076,  0.42338297,  0.70697761,  1.02828717, -0.46055272,
        0.97452474, -1.22998607, -0.77008367,  0.6896261 ,  2.29085374,
       -1.48301768, -0.4496088 , -0.75577509], dtype=float32))
(u'Reward_encoding_1_128_W:0', array([[  4.76499140e-01,  -2.20796287e-01,  -5.92657402e-02,
          1.15707986e-01,   1.33083987e+00,   4.72625405e-01,
         -6.42385900e-01,   2.59454536e+00,   5.37650764e-01,
          1.24659991e+00,  -1.64825916e-01,   1.32427490e+00,
         -9.07496154e-01,  -7.57403970e-01,   1.54092740e-02,
          3.64925474e-01,   1.14826310e+00,   4.60522100e-02,
          2.77157694e-01,  -1.33809209e+00,  -2.53070265e-01,
          5.84043741e-01,  -1.01582563e+00,   1.78370457e-02,
         -6.03367448e-01,   5.62290072e-01,   1.90181509e-01,
          4.71322417e-01,  -7.93119371e-01,   5.80193941e-03,
         -8.30242276e-01,  -3.29007432e-02,  -8.40659976e-01,
         -8.10154527e-02,  -1.68154645e+00,  -3.21893722e-01,
          1.11055303e+00,  -4.46316367e-03,   9.99986708e-01,
          2.55292118e-01,  -1.99664259e+00,   1.86688280e+00,
         -4.34519276e-02,  -8.31706524e-01,  -2.81001516e-02,
         -1.47625832e-02,   1.07165301e+00,  -3.56161380e+00,
          8.99102271e-01,   8.61781120e-01,  -6.52338088e-01,
         -6.22219801e-01,   1.10518456e+00,   4.98555034e-01,
          2.19628006e-01,   1.98764622e-01,  -1.17907357e+00,
          6.50045931e-01,   1.03201091e+00,   4.09647495e-01,
         -3.34217906e-01,   4.66810673e-01,  -8.71326864e-01,
         -4.56517283e-03,   3.57574858e-02,   3.15211326e-01,
          5.65982819e-01,  -6.02874339e-01,  -4.60490817e-03,
         -1.56810209e-02,   9.65321004e-01,  -5.29495589e-02,
          7.73043454e-01,  -1.01661766e-02,   1.16263878e+00,
          1.65993437e-01,   5.48864782e-01,  -3.44154648e-02,
          9.09340978e-01,   1.18474114e+00,   1.74554324e+00,
         -8.02219093e-01,  -3.19400616e-02,  -1.52818716e+00,
          1.58570981e+00,   2.35215902e-01,  -6.07176483e-01,
          6.49720132e-01,  -1.00122571e+00,   8.50680709e-01,
          2.23756745e-01,  -9.89084169e-02,   1.40152442e+00,
         -2.02584942e-03,   4.25671399e-01,  -8.74954015e-02,
         -1.15120697e+00,  -1.21545267e+00,  -8.71081829e-01,
         -9.50884819e-01,   7.77656198e-01,  -8.08137730e-02,
          4.27433848e-01,   1.52153850e+00,   1.09514451e+00,
         -1.98025715e+00,  -1.52405620e+00,   1.48977923e+00,
          6.11936040e-02,  -3.40559661e-01,   7.92392075e-01,
          1.50923932e+00,  -1.15533161e+00,  -1.17920232e+00,
          2.25546694e+00,   1.29590833e+00,  -3.34109247e-01,
         -1.36019080e-03,   3.16900164e-01,  -1.22074023e-01,
         -1.77369177e+00,   2.17186761e+00,  -2.88195219e-02,
          6.43402219e-01,   1.24598157e+00,  -9.93886530e-01,
         -8.51455867e-01,   4.61170524e-02]], dtype=float32))
(u'Reward_encoding_1_128_b:0', array([ -4.58243072e-01,   2.51842111e-01,  -2.56288853e-02,
        -1.28969371e+00,  -1.18382692e+00,  -4.36541617e-01,
         7.71907568e-01,  -1.77981913e+00,  -5.30279338e-01,
        -2.21542120e+00,   1.67489886e-01,  -1.18200481e+00,
         9.70692158e-01,   8.10533285e-01,  -2.32029427e-02,
        -1.78384170e-01,  -6.37198210e-01,  -3.13655287e-02,
        -2.70991236e-01,   1.32792985e+00,   2.57118791e-01,
        -4.95196879e-01,   1.01490629e+00,  -2.80046314e-02,
         5.87987661e-01,  -4.92751241e-01,  -1.44111082e-01,
        -4.40921962e-01,   7.96014965e-01,   2.03833673e-02,
         8.70526135e-01,  -7.23793358e-03,   8.66948128e-01,
         3.86499353e-02,   1.65978730e+00,   2.62024224e-01,
        -1.03353405e+00,   2.54650717e-03,  -8.22923422e-01,
         5.14212623e-02,   1.25511932e+00,  -1.18086839e+00,
         1.76622290e-02,   8.32203209e-01,   1.11220917e-02,
         1.45268934e-02,  -9.55925047e-01,   1.91847086e+00,
        -6.93966091e-01,  -6.57750785e-01,   7.05223024e-01,
         6.82390332e-01,  -9.63781953e-01,  -5.03551006e-01,
        -2.27872103e-01,  -2.05681682e-01,   1.17101562e+00,
        -5.15501678e-01,  -9.25370693e-01,  -3.54816437e-01,
         2.08925173e-01,  -3.80515277e-01,   9.82987225e-01,
        -3.59801054e-02,  -3.37884985e-02,  -3.15766782e-01,
        -5.67766547e-01,   6.29731238e-01,  -4.59050126e-02,
         6.40230775e-02,  -8.33289206e-01,   8.87009129e-03,
        -4.57916707e-01,  -7.90927035e-04,  -1.14628303e+00,
        -1.73917115e-01,  -3.05330604e-01,   7.65495002e-02,
        -8.95828605e-01,   6.57428980e-01,  -1.55417395e+00,
         8.17589700e-01,  -4.80616372e-03,   1.49933553e+00,
        -1.45443594e+00,  -2.41397575e-01,   3.33136916e-01,
        -5.64145446e-01,   9.96778369e-01,  -7.29937255e-01,
        -1.60592318e-01,  -1.19016366e-02,  -1.23587286e+00,
        -1.32581079e-02,  -4.25043941e-01,   1.12055242e-01,
         1.14641201e+00,   1.21384943e+00,   1.06668723e+00,
         1.21994019e+00,  -7.34939396e-01,  -1.54507207e-02,
        -2.66214788e-01,  -1.28366661e+00,  -9.72650886e-01,
         1.70621598e+00,   1.45167220e+00,  -1.01175821e+00,
        -1.91738196e-02,   3.33783954e-01,  -6.07802391e-01,
        -1.09877813e+00,   1.14534688e+00,   1.16847205e+00,
        -1.52562237e+00,  -1.12734687e+00,   3.47897589e-01,
        -3.97057794e-02,  -2.64860541e-01,   1.11990757e-01,
         1.75277734e+00,  -1.82037735e+00,  -1.31065296e-02,
        -8.23405385e-01,  -1.10197222e+00,   1.02221799e+00,
         1.09179115e+00,   4.23725061e-02], dtype=float32))
(u'Reward_hidden_0_128_W:0', array([[ 0.83349037,  1.21296501,  0.00448853, ...,  0.60700268,
         1.33233881, -0.72853488],
       [-0.94406837, -1.05251074,  0.23272924, ..., -1.81358683,
         0.64362174,  0.21876411],
       [-0.75013733, -1.33329594,  0.41544521, ..., -1.52459323,
        -0.99371088,  0.70020485],
       ..., 
       [-0.04463024, -1.58162189,  0.18725665, ..., -1.17390537,
        -2.04530907, -0.02644207],
       [-0.9411692 , -0.77221578,  0.13814911, ..., -0.49725607,
        -0.90558648,  0.09993076],
       [-0.24327403, -0.53596449, -0.14710134, ...,  0.25672656,
        -0.29638532, -0.15569364]], dtype=float32))
(u'Reward_hidden_0_128_b:0', array([ 0.53109473,  1.27395785, -0.44147992, -0.19489935,  0.85341161,
        0.22593337, -2.55004549, -1.053545  , -0.57657647,  0.26782432,
       -0.45197284,  0.20966217,  0.48517573, -0.66428554,  0.04668872,
        1.47446108,  1.29936957, -0.95409757,  0.17366379, -0.81762749,
        1.13768721,  0.36996326,  0.21051137,  1.88902354, -0.50809336,
        0.30507299,  0.79642922,  1.08679497,  0.13565618,  0.79041481,
       -1.87669086,  1.45954871,  1.54843187,  0.15835157, -0.17223334,
       -0.02407236,  0.08191435,  0.39530134, -0.21003407,  0.48097575,
       -1.38339186,  0.50607795, -1.56209612, -0.32403526,  0.34987643,
        0.4588398 ,  0.88887703,  1.26925516, -1.00344837, -0.4520852 ,
       -0.59809333,  0.81907117, -1.01680505, -0.88006854,  1.45424867,
       -0.62075526, -2.53952122,  1.58328462, -1.32298636, -0.76918042,
        0.87664121, -0.25924006, -0.19229053,  0.44402516,  1.27318716,
        0.53509444, -1.98459971,  0.55147839,  0.59365523, -0.84507781,
       -1.65072918, -0.67001617,  0.24511741,  1.151335  , -0.66665804,
        2.05744648,  0.46976298,  0.37840071,  1.76448905, -2.23876739,
       -1.86933517, -2.39598942, -1.48273695, -0.41978636, -0.6395117 ,
       -0.29402915,  0.59695673, -0.88752192,  1.21187508,  1.11320829,
        0.78605491,  1.24412513, -0.83418477,  0.20704107,  0.16547216,
       -0.14150801, -1.40515149, -1.81337595, -0.94795465,  0.16856134,
       -1.69233537, -0.97976893,  0.02120675, -1.24542999, -0.97938287,
        0.75847763,  1.61859965,  0.10126536,  0.29146612,  0.34719744,
        1.3495791 ,  0.52708435,  1.15654457,  1.39904749,  1.02346241,
       -1.07310879, -0.682863  , -0.21261062,  0.45747   , -0.61179149,
        0.06364289, -0.71747661, -0.63275796,  0.48339888,  1.09126174,
        0.2991468 , -0.59222394, -0.00430499], dtype=float32))
(u'Reward_output_W:0', array([[ 2.58039761],
       [-2.09393883],
       [-0.22107202],
       [-2.62154412],
       [-0.81057435],
       [ 5.40187168],
       [-2.52775955],
       [ 1.26408172],
       [ 1.51062751],
       [-0.2275949 ],
       [-2.92523336],
       [ 0.81043476],
       [-5.91224909],
       [-3.27910423],
       [ 1.7780056 ],
       [-0.13048044],
       [ 1.68519902],
       [-6.14645529],
       [ 2.41417789],
       [ 1.37945807],
       [ 2.94042277],
       [ 3.53901076],
       [-0.43131435],
       [ 0.23009761],
       [-2.31188965],
       [ 0.23374134],
       [-1.10170197],
       [ 1.97306156],
       [ 1.87295067],
       [ 1.74603128],
       [-0.7139923 ],
       [ 3.55514574],
       [ 6.45685816],
       [ 1.88998842],
       [ 1.73251212],
       [-0.52584445],
       [ 1.91720748],
       [-4.62714481],
       [-0.34051469],
       [-3.77927375],
       [-0.36231095],
       [-9.90133762],
       [-0.08712806],
       [-0.69359946],
       [ 2.91437721],
       [-1.44189525],
       [-0.34605822],
       [-0.35763028],
       [-1.27317631],
       [ 0.98678094],
       [ 1.16916788],
       [ 1.92442763],
       [ 1.76520586],
       [ 1.76317167],
       [-0.50724256],
       [ 1.25427091],
       [ 4.17573071],
       [ 1.40794384],
       [-5.04222775],
       [ 2.46109891],
       [ 3.44191289],
       [ 1.60664964],
       [-2.95860052],
       [-0.81211954],
       [ 2.52723384],
       [ 0.92661214],
       [ 1.31705165],
       [-4.8199873 ],
       [ 2.11865544],
       [-0.76884735],
       [-1.22868407],
       [-3.22862983],
       [-6.01477194],
       [ 1.73178911],
       [ 1.71447945],
       [ 1.0948422 ],
       [ 1.78660893],
       [ 0.92327875],
       [-1.3133651 ],
       [-3.1142261 ],
       [-1.49831867],
       [-0.32971111],
       [-1.35873163],
       [-2.32107878],
       [ 0.93830973],
       [ 2.79281139],
       [-1.62387419],
       [-0.34731704],
       [ 1.12364995],
       [-5.87035179],
       [ 0.98888129],
       [ 1.68647969],
       [ 2.20077085],
       [ 3.10455322],
       [-0.84159404],
       [-0.64523643],
       [ 6.4389267 ],
       [ 0.7573837 ],
       [-5.71666718],
       [ 1.76092994],
       [ 1.1271162 ],
       [-0.33683059],
       [-0.47232112],
       [-0.23483577],
       [-1.83075368],
       [ 3.23180628],
       [ 1.81414735],
       [ 1.70084441],
       [ 2.9925909 ],
       [-0.43085778],
       [-0.36817575],
       [-1.48089552],
       [ 0.99976552],
       [-2.04348421],
       [-2.79764915],
       [ 1.34251904],
       [-2.8227613 ],
       [-0.42931676],
       [ 1.58135033],
       [-2.12122178],
       [ 0.22943448],
       [-2.66062498],
       [-2.12325072],
       [ 1.24527967],
       [ 1.61775446],
       [-1.99071169],
       [-6.56041765],
       [-0.74085009]], dtype=float32))
(u'Reward_output_b:0', array([-1.30134094], dtype=float32))
(u'Value_hidden_0_256_W:0', array([[  1.58855450e+00,  -4.31995535e+00,   1.62565875e+00,
          1.61038792e+00,   5.82926631e-01,   2.15514889e+01,
         -5.04894400e+00,  -8.83786011e+00,   1.52040923e+00,
          5.96569955e-01,  -4.33716822e+00,   6.42487764e-01,
          1.58227074e+00,  -5.95761013e+00,  -7.78372955e+00,
          7.57033682e+00,  -3.90789104e+00,  -5.26107407e+00,
          1.59770036e+00,   5.35800219e-01,   7.18689442e+00,
         -1.37224770e+00,   2.91904539e-01,   1.59479828e+01,
          1.68982458e+00,   1.45033627e+01,   3.22896051e+00,
         -4.56743526e+00,  -9.54135227e+00,   3.07688785e+00,
          1.04872837e+01,   1.74967899e+01,   1.60816169e+00,
          1.55734987e+01,   1.84725990e+01,  -7.47812939e+00,
          1.60457659e+00,  -8.24539566e+00,  -4.51991367e+00,
         -8.10076809e+00,  -3.96850324e+00,  -9.61819935e+00,
         -1.21516571e+01,  -5.63216066e+00,   1.52078462e+00,
          1.58256662e+00,   1.54521122e+01,   1.54699886e+00,
         -9.51437569e+00,  -4.15675449e+00,   1.92958653e+00,
          1.54420292e+00,  -7.58803368e+00,  -1.12632875e+01,
          6.52852535e-01,   1.58918190e+00,  -8.44942570e+00,
         -3.88802361e+00,  -7.10462189e+00,   6.35077238e+00,
          1.18670082e+01,   6.63772762e-01,  -9.36955929e+00,
         -5.46086359e+00,  -1.96305656e+00,  -2.25835013e+00,
         -7.11876202e+00,   1.91773930e+01,   1.57413054e+01,
          1.27666559e+01,  -7.53592682e+00,  -7.67729378e+00,
         -1.57136548e+00,   4.36435789e-01,   1.53513491e+00,
          1.62366152e+00,  -6.13782358e+00,   1.60135531e+00,
         -4.77598935e-01,  -5.63575220e+00,  -8.01776981e+00,
          5.79482436e-01,   1.61417675e+00,  -5.87629616e-01,
         -7.35989952e+00,   1.13654652e+01,  -7.46784258e+00,
          1.57638931e+00,  -3.57466435e+00,   1.02233493e+00,
         -6.41943502e+00,   2.09273791e+00,  -6.52351236e+00,
          1.50494826e+00,   8.56918246e-02,   1.63634014e+00,
          1.60724688e+00,   8.35537255e-01,  -5.46696234e+00,
          3.48573804e+00,   6.52499080e-01,  -5.08095789e+00,
          1.57052815e+00,   9.29087043e-01,  -4.66365767e+00,
         -7.51616240e-01,  -9.44585991e+00,  -8.03517342e+00,
          2.11281509e+01,   3.34725499e-01,  -6.31357431e+00,
          1.03965015e+01,  -7.15171289e+00,   5.80116808e-01,
          1.55601561e+00,   1.69726408e+00,   1.55673885e+00,
          1.24038992e+01,   1.61313725e+00,  -6.90878105e+00,
         -1.20549707e+01,  -1.17716064e+01,  -8.22774982e+00,
         -7.82575035e+00,  -4.38699961e+00,  -4.56822157e+00,
         -4.54017496e+00,  -6.40132427e+00,  -1.40845194e+01,
         -4.78392422e-01,  -6.94685078e+00,   1.32337141e+01,
         -6.97252178e+00,  -4.30403471e+00,  -7.30491114e+00,
         -2.00038815e+00,   4.63165402e-01,   1.50174308e+00,
         -3.96858764e+00,  -4.56358719e+00,  -8.52467346e+00,
          1.58378053e+00,   2.81396359e-01,   6.04772389e-01,
         -4.06528187e+00,  -4.34716129e+00,   2.12942886e+01,
          1.56881523e+00,   1.59353006e+00,  -5.47932577e+00,
          6.34840310e-01,   1.06157579e+01,  -4.68873692e+00,
          5.94484389e-01,  -6.02791452e+00,  -5.09079027e+00,
         -6.88960314e+00,  -6.82970285e+00,   1.58436716e+00,
          1.54521644e+00,   1.59678066e+00,   2.00998116e+01,
         -5.05495882e+00,   6.50235116e-01,   1.06944885e+01,
          1.27194538e+01,   5.47508538e-01,   1.57479119e+00,
          1.80495663e+01,   2.11008663e+01,   1.26314468e+01,
         -5.30979729e+00,  -5.14125872e+00,   8.61612678e-01,
         -7.76932383e+00,  -5.10980511e+00,   1.75093441e+01,
         -6.17395258e+00,  -4.84902477e+00,  -6.44792557e+00,
         -9.14117813e+00,  -6.56304169e+00,  -3.76356292e+00,
          1.66115558e+00,   4.60527152e-01,  -4.45146227e+00,
          1.50267854e-02,   3.32092106e-01,   1.28787339e+00,
          1.53333163e+00,   1.64174354e+00,   1.59147060e+00,
          1.12016191e+01,   1.54059684e+00,  -5.14140081e+00,
         -9.39861393e+00,   3.42174697e+00,   1.58446991e+00,
          2.05528297e+01,  -8.96297395e-01,  -1.50519073e-01,
          1.48532724e+00,   6.29266858e-01,   1.52543652e+00,
         -6.26482677e+00,   9.83798790e+00,  -1.38165474e+01,
         -6.68937969e+00,   8.84234667e-01,   6.82043493e-01,
         -1.71237671e+00,  -6.44854212e+00,   5.90524316e-01,
          1.59155452e+00,  -6.28804398e+00,  -5.19480276e+00,
         -5.51800203e+00,  -2.24002552e+00,   1.79862154e+00,
          6.03401542e-01,  -6.29032707e+00,   1.52731931e+00,
         -6.05302954e+00,   1.59514129e+00,  -3.56139708e+00,
         -9.78015614e+00,   4.72735596e+00,   8.03247166e+00,
         -3.80049992e+00,  -4.60168982e+00,   7.81006932e-01,
         -6.84611320e+00,   1.54123843e+00,  -3.60955930e+00,
          1.49527800e+00,   5.86860836e-01,  -4.36876267e-02,
         -4.04340744e+00,  -4.89320368e-01,  -9.23300362e+00,
          2.12548714e+01,   2.82948828e+00,   5.92207789e-01,
         -9.61939716e+00,  -4.98700380e+00,   5.16022682e-01,
          1.24706459e+01,   9.54657614e-01,  -6.47740889e+00,
          1.55436206e+00,   1.44689465e+01,   1.51932251e+00,
          1.89697304e+01,  -5.32397516e-02,  -5.80393362e+00,
         -5.61003113e+00],
       [ -2.53003192e+00,   8.85561275e+00,  -2.39549851e+00,
         -2.32846856e+00,   1.09409447e+01,   7.41309309e+00,
          7.30201340e+00,   7.18071938e+00,  -2.21604347e+00,
         -6.98181772e+00,   4.77425528e+00,  -7.21540022e+00,
         -2.31905389e+00,   6.65398264e+00,   8.36586094e+00,
          9.60550308e+00,  -3.66319351e+01,   8.78736496e+00,
         -2.35704470e+00,  -8.14212608e+00,   8.39083576e+00,
         -1.09551525e+01,  -9.57750511e+00,   1.00704412e+01,
          1.04420662e+01,   1.01029634e+01,   9.23109722e+00,
          8.94228363e+00,   2.39754367e+00,   1.00816164e+01,
          1.03040781e+01,   1.07663774e+01,  -2.39134359e+00,
          1.03347092e+01,   1.03375998e+01,  -2.89300976e+01,
         -2.33974099e+00,  -2.04396935e+01,   8.83079815e+00,
          4.88398361e+00,   9.15108204e+00,  -1.64957695e+01,
         -1.55488987e+01,   7.86191130e+00,  -2.88494802e+00,
         -2.33244658e+00,   1.02982197e+01,  -2.28325081e+00,
         -1.67983265e+01,   8.41846085e+00,   9.44842148e+00,
         -2.25091434e+00,   8.17637539e+00,   1.64591360e+00,
         -6.84388351e+00,  -2.31938553e+00,  -2.63993225e+01,
          7.42392445e+00,  -3.15354404e+01,   9.53548336e+00,
          1.03751183e+01,  -7.44154978e+00,  -6.98537409e-01,
          8.05226040e+00,  -8.32350445e+00,  -1.97176266e+01,
          8.05860710e+00,   9.62928104e+00,   1.05513630e+01,
          1.03376589e+01,   6.82469273e+00,   7.69945574e+00,
         -7.75090504e+00,   9.55719090e+00,  -2.23841190e+00,
          9.26860809e+00,   8.42105103e+00,  -2.46904516e+00,
         -3.14542160e+01,   8.00324535e+00,   6.79045248e+00,
         -7.17443466e+00,  -2.44389176e+00,  -7.26350021e+00,
         -3.05066242e+01,   1.01345358e+01,   7.71096945e+00,
         -2.37017798e+00,   8.99473667e+00,  -6.10434341e+00,
          7.34316015e+00,   1.11973896e+01,   8.56594372e+00,
         -2.20067358e+00,  -1.41590061e+01,  -2.56962395e+00,
         -2.88362384e+00,  -6.91472006e+00,   7.70980310e+00,
          9.91966057e+00,  -6.70511961e+00,  -3.50559731e+01,
         -2.32102513e+00,  -6.44295263e+00,   8.10798454e+00,
         -7.31347799e+00,  -1.14895105e+00,   7.69565916e+00,
          6.15086603e+00,  -9.20790195e+00,   8.34171963e+00,
          1.09311686e+01,  -1.19396791e+01,  -7.24750948e+00,
         -2.27129745e+00,  -2.86143541e+00,  -2.28935766e+00,
          1.07053108e+01,  -2.40686941e+00,   6.59183979e+00,
         -1.61688862e+01,   1.42805055e-01,   6.17059040e+00,
         -2.49338913e+01,   7.38852882e+00,   8.26123142e+00,
          9.61674786e+00,   7.67311192e+00,  -1.94052811e+01,
         -7.74631643e+00,   7.80756235e+00,   1.06335449e+01,
          7.14223528e+00,   7.92863894e+00,   7.79319096e+00,
         -1.52780609e+01,  -7.01078892e+00,   1.13325663e+01,
          8.69751263e+00,   7.16693926e+00,  -2.37708092e+01,
         -2.32609463e+00,  -9.67922306e+00,  -7.03246212e+00,
          9.29314518e+00,   7.73265886e+00,   6.49411869e+00,
         -2.32041025e+00,  -2.56842732e+00,   7.75564766e+00,
         -6.82216740e+00,   1.10065174e+01,   7.89880705e+00,
         -6.76334906e+00,   8.70404911e+00,   8.09298611e+00,
          8.08876514e+00,   6.81925583e+00,  -2.54457760e+00,
         -2.26071692e+00,  -2.37655854e+00,   9.41113758e+00,
          7.31008148e+00,  -6.98374557e+00,   9.68274784e+00,
          1.00587654e+01,  -6.85194635e+00,  -2.32674932e+00,
          9.65944958e+00,   8.53087616e+00,   1.09403334e+01,
          8.41519833e+00,   8.50492954e+00,  -6.85334587e+00,
          7.89263964e+00,   8.02979469e+00,   9.61525917e+00,
          7.89022017e+00,  -3.69611282e+01,   8.45080471e+00,
          3.76047254e+00,   8.80344868e+00,  -3.71863708e+01,
         -2.52593470e+00,  -7.76544523e+00,   7.15636253e+00,
          9.77169800e+00,   1.00979824e+01,  -4.96316195e+00,
         -2.24925590e+00,  -2.42273283e+00,  -2.32981348e+00,
          1.09897442e+01,  -2.23781013e+00,   9.09359264e+00,
         -2.26223543e-01,   1.09290667e+01,  -2.33900023e+00,
          8.92096996e+00,   1.02710476e+01,  -1.13413572e+01,
         -2.19393063e+00,  -8.19469547e+00,  -2.24113488e+00,
          6.97500467e+00,   9.79874229e+00,  -2.01859245e+01,
         -3.11669159e+01,   1.06956215e+01,  -7.09741020e+00,
          1.03773127e+01,   6.20991325e+00,  -6.38596296e+00,
         -2.44569826e+00,   7.98239946e+00,   8.06593895e+00,
          8.08996773e+00,  -1.06567097e+01,  -2.82093048e+00,
         -6.82429457e+00,   8.87247944e+00,  -2.21632695e+00,
          7.37717676e+00,  -2.33633327e+00,   8.73017883e+00,
         -2.54469061e+00,   9.08958340e+00,   1.00473843e+01,
          8.08142662e+00,   7.81882954e+00,  -6.71199751e+00,
          8.37143707e+00,  -2.28332496e+00,   8.59674549e+00,
         -2.19673800e+00,  -7.39313078e+00,   1.09586802e+01,
         -9.93175888e+00,  -8.80949688e+00,   3.22494650e+00,
          7.31202364e+00,   9.86805248e+00,  -7.79269314e+00,
         -1.30298290e+01,   8.35633659e+00,  -7.67892790e+00,
          1.07105618e+01,  -6.09063196e+00,   8.02931309e+00,
         -2.35943580e+00,   1.05868444e+01,  -2.23296118e+00,
          9.23864365e+00,   9.58707714e+00,   7.44452095e+00,
         -1.14711943e+01]], dtype=float32))
(u'Value_hidden_0_256_b:0', array([ -6.46993589e+00,   6.09683084e+00,  -9.81201935e+00,
        -1.20631304e+01,   5.10508919e+00,   1.52809703e+00,
         7.39436388e+00,   4.20929861e+00,  -1.12523947e+01,
         2.69452405e+00,   1.58330650e+01,   2.92247319e+00,
        -1.06632109e+01,   6.14385462e+00,   4.88588810e+00,
         1.54388130e+00,   4.10624981e+00,   1.41374159e+00,
        -9.85144329e+00,   3.44745803e+00,   4.56238937e+00,
         5.14863777e+00,   4.18381882e+00,   2.80656457e+00,
         1.99322093e+00,   3.07957006e+00,   3.97121763e+00,
         4.27165508e+00,  -5.44006526e-01,   3.33263922e+00,
         2.48777461e+00,   2.26132298e+00,  -9.23217869e+00,
         3.05101466e+00,   3.04331446e+00,   2.28277588e+00,
        -1.08950624e+01,   1.32179713e+00,   3.50235152e+00,
         6.78618002e+00,   4.53722525e+00,   9.39071083e+00,
         1.07431622e+01,   6.94016314e+00,  -3.60793567e+00,
        -9.99567604e+00,   2.93978000e+00,  -9.98418522e+00,
         1.14638412e+00,   3.00926566e+00,   3.37240744e+00,
        -1.11792936e+01,   3.92959642e+00,   8.94151255e-02,
         2.67251849e+00,  -1.13576822e+01,   1.75200367e+00,
         8.30812454e+00,   3.42434740e+00,   5.01039839e+00,
         3.30549884e+00,   3.10793900e+00,  -3.75198796e-02,
         4.53726673e+00,   3.76743007e+00,   9.45179558e+00,
         6.25736046e+00,   2.69455457e+00,   2.91050267e+00,
         2.97439075e+00,   6.21127415e+00,   4.10361004e+00,
         3.36644459e+00,   1.41982222e+00,  -1.04660206e+01,
         5.15800953e+00,   3.04828525e+00,  -7.57705688e+00,
         1.29447794e+01,   4.07517624e+00,   7.63801765e+00,
         2.79181170e+00,  -8.20702457e+00,   3.02726054e+00,
         2.29492140e+00,   3.41110611e+00,   2.64843249e+00,
        -8.68097591e+00,   3.47253680e+00,   2.88636136e+00,
         4.89908266e+00,   3.20582271e+00,   4.53471422e+00,
        -1.14519463e+01,   6.35197735e+00,  -6.75388288e+00,
        -4.23381090e+00,   3.00166130e+00,   5.80706978e+00,
         4.16706753e+00,   2.61737156e+00,   3.18638134e+00,
        -9.88213158e+00,   2.71350169e+00,   5.61807537e+00,
         3.06802464e+00,   3.60509893e-03,   3.24506783e+00,
         1.40661335e+00,   4.01871872e+00,   4.20545197e+00,
         2.39045691e+00,   6.89080715e+00,   2.85443997e+00,
        -1.22361984e+01,  -5.04239798e+00,  -1.09591942e+01,
         2.85177827e+00,  -8.99491215e+00,   6.67115259e+00,
         1.07297144e+01,   4.28836383e-02,   5.22877216e+00,
         3.09228182e+00,   7.95855761e+00,   3.49869156e+00,
         3.98066139e+00,   6.19654608e+00,   1.22909288e+01,
         3.29468155e+00,   6.84247553e-01,   3.59504700e+00,
         6.01678181e+00,   8.41423512e+00,   5.38693237e+00,
         7.39986610e+00,   2.71949458e+00,   3.14329648e+00,
         4.21695375e+00,   8.46883011e+00,   2.43896151e+00,
        -9.90820026e+00,   4.25058222e+00,   2.73047137e+00,
         4.09942818e+00,   6.69231415e+00,   1.44775426e+00,
        -9.69124699e+00,  -6.21628523e+00,   6.28737402e+00,
         2.80976367e+00,   2.36737466e+00,   4.73107767e+00,
         2.61456966e+00,   3.51744080e+00,   7.51973486e+00,
         3.78149986e+00,   7.47525644e+00,  -6.36659050e+00,
        -1.05121737e+01,  -9.10657883e+00,   2.27350068e+00,
         6.72624016e+00,   2.88084459e+00,   3.01717377e+00,
         3.13672090e+00,   2.67095685e+00,  -9.80922127e+00,
         2.64621139e+00,   1.75292087e+00,   2.75703573e+00,
         7.06741142e+00,   3.84294510e+00,   3.56369376e+00,
         5.82183933e+00,   3.67812371e+00,   2.91328883e+00,
         7.29147625e+00,   3.54948926e+00,   5.01576233e+00,
        -4.38778073e-01,   4.35950089e+00,   3.85181022e+00,
        -7.93558645e+00,   3.12956405e+00,   8.67310524e+00,
         1.57258487e+00,   4.14394855e+00,   3.15832734e+00,
        -1.16732435e+01,  -9.49934387e+00,  -1.05555696e+01,
         2.47350860e+00,  -1.08378506e+01,   3.22470808e+00,
         2.07776390e-02,   1.94810867e+00,  -9.48894978e+00,
         2.15385199e+00,   2.86280060e+00,   5.91880560e+00,
        -1.16716738e+01,   3.50796676e+00,  -1.04793053e+01,
         5.85563993e+00,   2.78474021e+00,   1.20669165e+01,
         2.57065392e+00,   3.04297018e+00,   2.77586842e+00,
         1.98068404e+00,   8.15478325e+00,   2.44499373e+00,
        -7.69416380e+00,   6.07718563e+00,   6.94658184e+00,
         6.12182474e+00,   5.02222061e+00,  -6.13056946e+00,
         2.61275077e+00,   6.75010145e-01,  -1.18990774e+01,
         6.34448004e+00,  -1.04877586e+01,   3.03177810e+00,
         9.69449803e-02,   4.94658089e+00,   1.79380786e+00,
         5.66406059e+00,   6.23661661e+00,   2.60485530e+00,
         5.38836861e+00,  -9.76956749e+00,   2.70852590e+00,
        -1.12050276e+01,   3.06299210e+00,   6.59300280e+00,
         4.87766600e+00,   3.90205336e+00,  -7.60238171e-01,
         1.66826057e+00,   1.79832363e+00,   3.27109003e+00,
         8.37677193e+00,   4.28591824e+00,   3.08773375e+00,
         2.84103489e+00,   1.80365276e+00,   7.44261694e+00,
        -8.07192612e+00,   3.13988066e+00,  -1.06231203e+01,
         2.37853694e+00,   1.08058643e+00,   4.65958118e+00,
         5.91939497e+00], dtype=float32))
(u'Value_hidden_1_128_W:0', array([[ -1.25780666,  -1.17092371,  -1.25355208, ...,  -0.93317389,
         -7.35614252,  -7.5642972 ],
       [  2.90516615,   0.25105655,   3.00166345, ...,   2.63310456,
          7.22947454,   7.13979435],
       [ -0.65097791,  -0.87851095,  -0.85896653, ...,  -0.43622574,
         -7.73628712,  -7.86126614],
       ..., 
       [  9.7937355 ,  -1.15326357,  10.1008215 , ...,   9.36998749,
          6.19552994,   6.07367802],
       [  3.81829929,   0.20512164,   3.85689116, ...,   3.45021939,
          7.263165  ,   7.41106415],
       [ -1.58820951,   0.64163786,  -1.95892632, ...,  -1.10761905,
         10.46699333,  10.72078609]], dtype=float32))
(u'Value_hidden_1_128_b:0', array([ 0.84226799,  1.18302882,  1.1612705 , -0.07492125, -1.88058579,
        0.13309792,  0.20504384,  9.60604191,  8.82534885,  0.85720533,
        2.58401895,  8.69072723,  4.87706041,  9.80818748,  3.31918049,
        1.21543324, -0.78764468,  0.78067315,  3.94023085,  3.445472  ,
        0.89148057, -0.43656385, -0.50174987, -1.44359136,  0.9475199 ,
        1.78578603, -0.53415048,  0.5647741 ,  0.59887326, -1.507025  ,
        1.08308601, -0.48177078,  0.91136318,  1.58355021,  2.40578175,
        1.33149219, -0.27791637,  1.58377242, -0.07530674,  0.16699468,
       -0.16718031,  0.87290472, -1.20745122, -1.23954892,  9.84553623,
        0.52614009,  2.03862262,  0.3489002 ,  7.67496824,  4.45513487,
        0.89774954,  1.61138225,  1.9832418 ,  8.29558372, -0.57602602,
        1.53072011,  1.84700418,  0.27018818,  0.58351821,  1.55427396,
       -0.91076845,  1.46978939,  1.23094845,  1.03668511,  0.93598187,
        1.11400998,  0.90899134, -0.6142714 ,  8.59216881,  2.46433353,
        0.61918688,  0.86444366,  0.37071085,  3.43877673,  8.85885239,
        8.61473274,  1.95188177,  0.20756646,  7.3170495 ,  2.52595687,
       -1.87919283,  0.16388896,  0.68811661,  1.12959158, -0.75186425,
        0.15404917, -1.63201404,  3.74702382,  2.25460625,  0.1513105 ,
        5.43347454,  1.8512876 , -0.34228662,  0.51394838,  0.83213943,
        2.17330503,  0.98101383,  0.39970127, -0.46188936,  1.51158547,
        8.18206596,  0.25980452,  0.79585361,  0.96771598,  1.32820559,
        1.22119784,  2.83905244,  0.46498817,  7.50671005,  1.77912259,
        0.1328591 ,  0.01567538, -0.83439285,  3.48453498,  0.25987068,
       -1.0048418 , -1.15252471,  5.61252213,  2.04233718,  0.07792626,
        1.4678539 ,  0.20005444,  9.33331108,  0.64773172,  1.65842187,
        1.21791756,  9.45575428,  8.34985828], dtype=float32))
(u'Value_output_W:0', array([[ 14.08072376],
       [ -6.91102791],
       [ 13.70190334],
       [ -7.07524157],
       [ -8.7116518 ],
       [ -6.8711791 ],
       [ -7.95503187],
       [ 11.36927891],
       [ 11.23874569],
       [ -7.12090158],
       [ 12.71437359],
       [ 11.33105946],
       [ 13.76411057],
       [ 11.24247074],
       [ 12.41063881],
       [-10.22958565],
       [ 13.59835625],
       [ 15.75961113],
       [ 12.93144321],
       [ 12.26033688],
       [ -7.01160336],
       [ -7.7703023 ],
       [ -8.33419991],
       [ -8.38042927],
       [ 13.30167389],
       [ -7.3757596 ],
       [-10.33202744],
       [ -8.19341278],
       [-18.28740692],
       [ -8.07747459],
       [ 14.22387886],
       [ -8.20703316],
       [-18.28321266],
       [ 12.95490742],
       [ -6.85462809],
       [ 11.81343555],
       [ -8.01800823],
       [ 13.40439224],
       [ -7.87752485],
       [ -7.19196653],
       [ -8.23436642],
       [ -8.16296196],
       [ -7.91334057],
       [ -7.80759048],
       [ 11.23246098],
       [ -8.19746304],
       [ -7.22853374],
       [ -6.88819075],
       [ 10.94302273],
       [ 12.93998909],
       [ 15.80602932],
       [ -9.32952309],
       [-18.21645546],
       [ 11.10361958],
       [ -8.23092651],
       [ 12.06726837],
       [ -7.32524347],
       [ -7.19428635],
       [ -8.10025311],
       [-18.34348679],
       [ -6.86743212],
       [ -6.88756371],
       [ -6.92413425],
       [ -7.26828575],
       [-14.03055668],
       [ -8.09986591],
       [ 12.74128151],
       [ 14.0692625 ],
       [ 10.95670509],
       [-13.74651051],
       [ -8.36290169],
       [ -9.30518055],
       [ 14.36920261],
       [ 13.94973946],
       [ 11.23568726],
       [ 11.2925024 ],
       [ -9.28734684],
       [ -8.22806454],
       [ 11.464818  ],
       [ 13.47125244],
       [ -7.88527441],
       [ -7.41429472],
       [ -8.35400105],
       [ -7.23832035],
       [ -8.21488857],
       [ -9.54511166],
       [ -8.39470482],
       [ 13.43649292],
       [ -8.38013458],
       [ 12.20474815],
       [ 18.32191658],
       [ 11.98346329],
       [ -7.25662613],
       [-12.78783703],
       [ -6.79527378],
       [ 12.38147736],
       [-16.85344124],
       [ -7.07673597],
       [ -8.22855282],
       [ -7.06379366],
       [ 11.25438118],
       [ -8.00475121],
       [ -8.2504406 ],
       [ -8.14599037],
       [ -7.12892962],
       [ 12.55915165],
       [ 12.63164425],
       [ 15.62143612],
       [ 11.18899536],
       [ 11.84920788],
       [ -7.07597828],
       [ -6.94098377],
       [ 14.15165043],
       [ 12.44956684],
       [-17.26247597],
       [ -8.13848877],
       [ -8.34531498],
       [ 13.27914715],
       [ 13.06786728],
       [ -6.9589138 ],
       [ 11.8812542 ],
       [ -7.76262903],
       [ 11.25865936],
       [ -7.25028324],
       [ 14.01673603],
       [ 15.85590935],
       [ 11.06599045],
       [ 11.36882591]], dtype=float32))
(u'Value_output_b:0', array([ 7.8088088], dtype=float32))

In [ ]: