Documentation: https://gym.openai.com/docs/
git clone https://github.com/openai/gym cd gym pip install -e . # minimal install
In [1]:
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
Using TensorFlow backend.
In [5]:
env = gym.make('CartPole-v0')
env.seed(123)
nb_actions = env.action_space.n
[2017-10-30 16:48:52,445] Making new env: CartPole-v0
In [4]:
env.reset()
for _ in range(1000):
env.render()
env.step(env.action_space.sample()) # take a random action
[2017-10-30 16:48:36,076] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.
In [6]:
env.observation_space.shape
Out[6]:
(4,)
In [7]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten_1 (Flatten) (None, 4) 0
_________________________________________________________________
dense_1 (Dense) (None, 16) 80
_________________________________________________________________
activation_1 (Activation) (None, 16) 0
_________________________________________________________________
dense_2 (Dense) (None, 16) 272
_________________________________________________________________
activation_2 (Activation) (None, 16) 0
_________________________________________________________________
dense_3 (Dense) (None, 16) 272
_________________________________________________________________
activation_3 (Activation) (None, 16) 0
_________________________________________________________________
dense_4 (Dense) (None, 2) 34
_________________________________________________________________
activation_4 (Activation) (None, 2) 0
=================================================================
Total params: 658
Trainable params: 658
Non-trainable params: 0
_________________________________________________________________
None
In [10]:
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
nb_actions=nb_actions,
memory=memory,
nb_steps_warmup=10,
target_model_update=1e-2,
policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
In [11]:
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
Training for 50000 steps ...
/usr/local/lib/python2.7/site-packages/rl/memory.py:29: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!
warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')
12/50000: episode: 1, duration: 1.224s, episode steps: 12, steps per second: 10, episode reward: 12.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.333 [0.000, 1.000], mean observation: 0.098 [-0.995, 1.587], loss: 0.730131, mean_absolute_error: 0.697327, mean_q: 0.141359
36/50000: episode: 2, duration: 0.404s, episode steps: 24, steps per second: 59, episode reward: 24.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.417 [0.000, 1.000], mean observation: 0.123 [-0.755, 1.734], loss: 0.581815, mean_absolute_error: 0.627062, mean_q: 0.237459
46/50000: episode: 3, duration: 0.186s, episode steps: 10, steps per second: 54, episode reward: 10.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.000 [0.000, 0.000], mean observation: 0.167 [-1.905, 3.089], loss: 0.380364, mean_absolute_error: 0.556484, mean_q: 0.419568
90/50000: episode: 4, duration: 0.734s, episode steps: 44, steps per second: 60, episode reward: 44.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.568 [0.000, 1.000], mean observation: 0.013 [-1.843, 1.333], loss: 0.131956, mean_absolute_error: 0.627912, mean_q: 1.045320
103/50000: episode: 5, duration: 0.215s, episode steps: 13, steps per second: 60, episode reward: 13.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.154 [0.000, 1.000], mean observation: 0.114 [-1.721, 2.713], loss: 0.049344, mean_absolute_error: 0.770834, mean_q: 1.380475
126/50000: episode: 6, duration: 0.384s, episode steps: 23, steps per second: 60, episode reward: 23.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.478 [0.000, 1.000], mean observation: 0.061 [-0.968, 1.467], loss: 0.038807, mean_absolute_error: 0.802404, mean_q: 1.461194
163/50000: episode: 7, duration: 0.617s, episode steps: 37, steps per second: 60, episode reward: 37.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.622 [0.000, 1.000], mean observation: 0.061 [-2.034, 1.770], loss: 0.028424, mean_absolute_error: 0.880773, mean_q: 1.700444
177/50000: episode: 8, duration: 0.229s, episode steps: 14, steps per second: 61, episode reward: 14.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.643 [0.000, 1.000], mean observation: -0.100 [-1.558, 0.792], loss: 0.047996, mean_absolute_error: 1.004444, mean_q: 1.976723
190/50000: episode: 9, duration: 0.219s, episode steps: 13, steps per second: 59, episode reward: 13.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.692 [0.000, 1.000], mean observation: -0.083 [-1.703, 1.031], loss: 0.041687, mean_absolute_error: 1.090694, mean_q: 2.210563
202/50000: episode: 10, duration: 0.201s, episode steps: 12, steps per second: 60, episode reward: 12.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.417 [0.000, 1.000], mean observation: 0.098 [-0.999, 1.663], loss: 0.059780, mean_absolute_error: 1.126381, mean_q: 2.206965
227/50000: episode: 11, duration: 0.416s, episode steps: 25, steps per second: 60, episode reward: 25.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.640 [0.000, 1.000], mean observation: -0.034 [-2.275, 1.529], loss: 0.062852, mean_absolute_error: 1.207087, mean_q: 2.353915
247/50000: episode: 12, duration: 0.332s, episode steps: 20, steps per second: 60, episode reward: 20.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.450 [0.000, 1.000], mean observation: 0.103 [-0.740, 1.204], loss: 0.078029, mean_absolute_error: 1.303885, mean_q: 2.546268
270/50000: episode: 13, duration: 0.384s, episode steps: 23, steps per second: 60, episode reward: 23.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.609 [0.000, 1.000], mean observation: -0.070 [-2.022, 1.039], loss: 0.050891, mean_absolute_error: 1.372919, mean_q: 2.732219
280/50000: episode: 14, duration: 0.166s, episode steps: 10, steps per second: 60, episode reward: 10.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.700 [0.000, 1.000], mean observation: -0.104 [-1.629, 0.999], loss: 0.115953, mean_absolute_error: 1.451221, mean_q: 2.841381
291/50000: episode: 15, duration: 0.183s, episode steps: 11, steps per second: 60, episode reward: 11.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.818 [0.000, 1.000], mean observation: -0.136 [-2.455, 1.559], loss: 0.135146, mean_absolute_error: 1.510224, mean_q: 2.937984
308/50000: episode: 16, duration: 0.283s, episode steps: 17, steps per second: 60, episode reward: 17.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.471 [0.000, 1.000], mean observation: 0.101 [-0.581, 0.993], loss: 0.118566, mean_absolute_error: 1.591753, mean_q: 3.116969
319/50000: episode: 17, duration: 0.183s, episode steps: 11, steps per second: 60, episode reward: 11.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.182 [0.000, 1.000], mean observation: 0.105 [-1.354, 2.180], loss: 0.141387, mean_absolute_error: 1.628607, mean_q: 3.126880
333/50000: episode: 18, duration: 0.232s, episode steps: 14, steps per second: 60, episode reward: 14.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.357 [0.000, 1.000], mean observation: 0.091 [-0.990, 1.549], loss: 0.109661, mean_absolute_error: 1.686302, mean_q: 3.338614
343/50000: episode: 19, duration: 0.167s, episode steps: 10, steps per second: 60, episode reward: 10.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.200 [0.000, 1.000], mean observation: 0.148 [-1.346, 2.190], loss: 0.160945, mean_absolute_error: 1.734304, mean_q: 3.332022
362/50000: episode: 20, duration: 0.316s, episode steps: 19, steps per second: 60, episode reward: 19.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.526 [0.000, 1.000], mean observation: -0.085 [-1.177, 0.786], loss: 0.113006, mean_absolute_error: 1.780940, mean_q: 3.505341
371/50000: episode: 21, duration: 0.149s, episode steps: 9, steps per second: 60, episode reward: 9.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.222 [0.000, 1.000], mean observation: 0.155 [-0.964, 1.743], loss: 0.097902, mean_absolute_error: 1.815986, mean_q: 3.611781
384/50000: episode: 22, duration: 0.217s, episode steps: 13, steps per second: 60, episode reward: 13.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.692 [0.000, 1.000], mean observation: -0.097 [-1.694, 0.974], loss: 0.180323, mean_absolute_error: 1.914621, mean_q: 3.676909
413/50000: episode: 23, duration: 0.483s, episode steps: 29, steps per second: 60, episode reward: 29.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.517 [0.000, 1.000], mean observation: -0.016 [-1.158, 0.817], loss: 0.163045, mean_absolute_error: 2.003331, mean_q: 3.925609
444/50000: episode: 24, duration: 0.516s, episode steps: 31, steps per second: 60, episode reward: 31.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.645 [0.000, 1.000], mean observation: 0.039 [-2.286, 1.753], loss: 0.178069, mean_absolute_error: 2.141761, mean_q: 4.136299
554/50000: episode: 25, duration: 1.834s, episode steps: 110, steps per second: 60, episode reward: 110.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.527 [0.000, 1.000], mean observation: -0.137 [-3.031, 1.631], loss: 0.171184, mean_absolute_error: 2.444498, mean_q: 4.794304
568/50000: episode: 26, duration: 0.229s, episode steps: 14, steps per second: 61, episode reward: 14.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.095 [-0.581, 1.168], loss: 0.200073, mean_absolute_error: 2.757100, mean_q: 5.368252
610/50000: episode: 27, duration: 0.704s, episode steps: 42, steps per second: 60, episode reward: 42.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.476 [0.000, 1.000], mean observation: -0.195 [-1.199, 0.580], loss: 0.240455, mean_absolute_error: 2.841887, mean_q: 5.515810
631/50000: episode: 28, duration: 0.350s, episode steps: 21, steps per second: 60, episode reward: 21.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.524 [0.000, 1.000], mean observation: -0.068 [-1.364, 0.925], loss: 0.242162, mean_absolute_error: 2.984877, mean_q: 5.784202
650/50000: episode: 29, duration: 0.317s, episode steps: 19, steps per second: 60, episode reward: 19.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.579 [0.000, 1.000], mean observation: -0.062 [-1.330, 0.650], loss: 0.372714, mean_absolute_error: 3.055348, mean_q: 5.768178
685/50000: episode: 30, duration: 0.583s, episode steps: 35, steps per second: 60, episode reward: 35.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.543 [0.000, 1.000], mean observation: -0.073 [-1.914, 1.040], loss: 0.285742, mean_absolute_error: 3.131288, mean_q: 6.006039
727/50000: episode: 31, duration: 0.700s, episode steps: 42, steps per second: 60, episode reward: 42.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.595 [0.000, 1.000], mean observation: 0.064 [-1.889, 1.689], loss: 0.272975, mean_absolute_error: 3.306600, mean_q: 6.389527
807/50000: episode: 32, duration: 1.332s, episode steps: 80, steps per second: 60, episode reward: 80.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.525 [0.000, 1.000], mean observation: 0.172 [-0.949, 1.455], loss: 0.292107, mean_absolute_error: 3.562855, mean_q: 6.933080
833/50000: episode: 33, duration: 0.434s, episode steps: 26, steps per second: 60, episode reward: 26.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.538 [0.000, 1.000], mean observation: -0.035 [-1.461, 0.998], loss: 0.268089, mean_absolute_error: 3.770189, mean_q: 7.350021
890/50000: episode: 34, duration: 0.951s, episode steps: 57, steps per second: 60, episode reward: 57.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.526 [0.000, 1.000], mean observation: 0.075 [-1.500, 1.573], loss: 0.309585, mean_absolute_error: 3.955331, mean_q: 7.746324
930/50000: episode: 35, duration: 0.668s, episode steps: 40, steps per second: 60, episode reward: 40.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.078 [-1.312, 1.182], loss: 0.281607, mean_absolute_error: 4.159922, mean_q: 8.234924
1042/50000: episode: 36, duration: 1.865s, episode steps: 112, steps per second: 60, episode reward: 112.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.527 [0.000, 1.000], mean observation: -0.106 [-2.214, 1.715], loss: 0.344884, mean_absolute_error: 4.478506, mean_q: 8.907092
1054/50000: episode: 37, duration: 0.201s, episode steps: 12, steps per second: 60, episode reward: 12.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.101 [-1.333, 0.803], loss: 0.441257, mean_absolute_error: 4.778954, mean_q: 9.557951
1176/50000: episode: 38, duration: 2.035s, episode steps: 122, steps per second: 60, episode reward: 122.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.492 [0.000, 1.000], mean observation: -0.135 [-1.484, 1.116], loss: 0.455703, mean_absolute_error: 5.032112, mean_q: 10.088054
1262/50000: episode: 39, duration: 1.431s, episode steps: 86, steps per second: 60, episode reward: 86.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.512 [0.000, 1.000], mean observation: -0.203 [-1.745, 1.345], loss: 0.547364, mean_absolute_error: 5.525266, mean_q: 11.085084
1282/50000: episode: 40, duration: 0.335s, episode steps: 20, steps per second: 60, episode reward: 20.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.070 [-0.606, 1.082], loss: 0.400191, mean_absolute_error: 5.686526, mean_q: 11.452986
1388/50000: episode: 41, duration: 1.768s, episode steps: 106, steps per second: 60, episode reward: 106.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.509 [0.000, 1.000], mean observation: 0.131 [-1.295, 1.233], loss: 0.592236, mean_absolute_error: 6.023518, mean_q: 12.141464
1522/50000: episode: 42, duration: 2.235s, episode steps: 134, steps per second: 60, episode reward: 134.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.154 [-1.357, 1.667], loss: 0.573660, mean_absolute_error: 6.613861, mean_q: 13.364227
1597/50000: episode: 43, duration: 1.250s, episode steps: 75, steps per second: 60, episode reward: 75.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.507 [0.000, 1.000], mean observation: 0.113 [-0.991, 1.141], loss: 0.843798, mean_absolute_error: 7.112903, mean_q: 14.347638
1736/50000: episode: 44, duration: 2.318s, episode steps: 139, steps per second: 60, episode reward: 139.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.489 [0.000, 1.000], mean observation: -0.258 [-2.027, 1.460], loss: 0.769187, mean_absolute_error: 7.622921, mean_q: 15.431851
1896/50000: episode: 45, duration: 2.668s, episode steps: 160, steps per second: 60, episode reward: 160.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.487 [0.000, 1.000], mean observation: -0.123 [-1.443, 1.158], loss: 0.852918, mean_absolute_error: 8.284316, mean_q: 16.854824
2049/50000: episode: 46, duration: 2.551s, episode steps: 153, steps per second: 60, episode reward: 153.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.198 [-1.242, 1.548], loss: 0.986513, mean_absolute_error: 9.049216, mean_q: 18.357847
2249/50000: episode: 47, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.475 [0.000, 1.000], mean observation: -0.232 [-1.791, 0.846], loss: 0.999475, mean_absolute_error: 9.877664, mean_q: 20.089165
2449/50000: episode: 48, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.159 [-1.218, 0.921], loss: 1.456109, mean_absolute_error: 10.855062, mean_q: 22.039587
2649/50000: episode: 49, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: -0.014 [-1.126, 0.954], loss: 1.318281, mean_absolute_error: 11.809815, mean_q: 23.985289
2800/50000: episode: 50, duration: 2.518s, episode steps: 151, steps per second: 60, episode reward: 151.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.470 [0.000, 1.000], mean observation: -0.390 [-2.118, 1.216], loss: 1.558319, mean_absolute_error: 12.529769, mean_q: 25.496513
2993/50000: episode: 51, duration: 3.218s, episode steps: 193, steps per second: 60, episode reward: 193.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.472 [0.000, 1.000], mean observation: -0.354 [-2.406, 1.164], loss: 1.602982, mean_absolute_error: 13.319470, mean_q: 27.046223
3192/50000: episode: 52, duration: 3.318s, episode steps: 199, steps per second: 60, episode reward: 199.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.477 [0.000, 1.000], mean observation: -0.341 [-2.436, 0.965], loss: 1.873602, mean_absolute_error: 14.110317, mean_q: 28.648241
3392/50000: episode: 53, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.314 [-2.247, 1.054], loss: 1.604448, mean_absolute_error: 14.919781, mean_q: 30.361876
3591/50000: episode: 54, duration: 3.319s, episode steps: 199, steps per second: 60, episode reward: 199.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.477 [0.000, 1.000], mean observation: -0.351 [-2.408, 0.793], loss: 1.980125, mean_absolute_error: 15.735256, mean_q: 32.023682
3791/50000: episode: 55, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: -0.003 [-1.137, 0.974], loss: 2.540923, mean_absolute_error: 16.590670, mean_q: 33.709198
3973/50000: episode: 56, duration: 3.036s, episode steps: 182, steps per second: 60, episode reward: 182.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.473 [0.000, 1.000], mean observation: -0.387 [-2.430, 1.003], loss: 2.051823, mean_absolute_error: 17.389811, mean_q: 35.392284
4173/50000: episode: 57, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.238 [-1.770, 1.190], loss: 2.837559, mean_absolute_error: 18.139902, mean_q: 36.819218
4371/50000: episode: 58, duration: 3.302s, episode steps: 198, steps per second: 60, episode reward: 198.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.339 [-2.416, 1.056], loss: 3.072577, mean_absolute_error: 18.923796, mean_q: 38.395458
4571/50000: episode: 59, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.229 [-1.714, 0.974], loss: 2.903583, mean_absolute_error: 19.534241, mean_q: 39.677567
4771/50000: episode: 60, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.170 [-1.524, 1.203], loss: 2.664678, mean_absolute_error: 20.369638, mean_q: 41.395058
4971/50000: episode: 61, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.178 [-1.503, 0.958], loss: 2.979738, mean_absolute_error: 21.034321, mean_q: 42.789951
5147/50000: episode: 62, duration: 2.935s, episode steps: 176, steps per second: 60, episode reward: 176.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.477 [0.000, 1.000], mean observation: -0.381 [-2.408, 0.875], loss: 2.436398, mean_absolute_error: 21.884859, mean_q: 44.474510
5347/50000: episode: 63, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.475 [0.000, 1.000], mean observation: -0.261 [-1.854, 0.901], loss: 2.701290, mean_absolute_error: 22.461739, mean_q: 45.713348
5547/50000: episode: 64, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.321 [-2.304, 0.873], loss: 3.102812, mean_absolute_error: 23.208960, mean_q: 47.037090
5734/50000: episode: 65, duration: 3.119s, episode steps: 187, steps per second: 60, episode reward: 187.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.481 [0.000, 1.000], mean observation: -0.361 [-2.422, 0.938], loss: 3.649678, mean_absolute_error: 23.816454, mean_q: 48.277233
5934/50000: episode: 66, duration: 3.331s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.096 [-1.106, 1.022], loss: 4.870682, mean_absolute_error: 24.306807, mean_q: 49.202721
6134/50000: episode: 67, duration: 3.339s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.298 [-2.113, 1.011], loss: 4.202134, mean_absolute_error: 25.037476, mean_q: 50.777851
6334/50000: episode: 68, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.143 [-1.288, 0.830], loss: 4.148354, mean_absolute_error: 25.533611, mean_q: 51.830811
6520/50000: episode: 69, duration: 3.102s, episode steps: 186, steps per second: 60, episode reward: 186.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.478 [0.000, 1.000], mean observation: -0.367 [-2.404, 0.760], loss: 4.093632, mean_absolute_error: 26.162220, mean_q: 53.094643
6720/50000: episode: 70, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.326 [-2.381, 1.182], loss: 4.553807, mean_absolute_error: 26.612968, mean_q: 53.887035
6920/50000: episode: 71, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.201 [-1.531, 1.142], loss: 4.145655, mean_absolute_error: 27.202936, mean_q: 55.103466
7120/50000: episode: 72, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.185 [-1.440, 0.942], loss: 3.731707, mean_absolute_error: 27.793386, mean_q: 56.381481
7314/50000: episode: 73, duration: 3.236s, episode steps: 194, steps per second: 60, episode reward: 194.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.479 [0.000, 1.000], mean observation: -0.355 [-2.401, 0.925], loss: 3.349974, mean_absolute_error: 28.317257, mean_q: 57.499626
7514/50000: episode: 74, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.244 [-1.864, 0.896], loss: 3.616204, mean_absolute_error: 28.756714, mean_q: 58.314098
7714/50000: episode: 75, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.264 [-1.958, 0.972], loss: 2.843238, mean_absolute_error: 29.379019, mean_q: 59.669456
7914/50000: episode: 76, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.268 [-1.965, 1.120], loss: 5.869200, mean_absolute_error: 30.008516, mean_q: 60.571121
8114/50000: episode: 77, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.295 [-2.205, 0.879], loss: 3.701240, mean_absolute_error: 30.083351, mean_q: 60.950108
8314/50000: episode: 78, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.164 [-1.440, 1.249], loss: 4.320634, mean_absolute_error: 30.659094, mean_q: 62.019585
8512/50000: episode: 79, duration: 3.303s, episode steps: 198, steps per second: 60, episode reward: 198.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.470 [0.000, 1.000], mean observation: -0.337 [-2.426, 1.183], loss: 5.627623, mean_absolute_error: 31.061623, mean_q: 62.806965
8712/50000: episode: 80, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.310 [-2.230, 1.066], loss: 4.753113, mean_absolute_error: 31.242180, mean_q: 63.190205
8907/50000: episode: 81, duration: 3.251s, episode steps: 195, steps per second: 60, episode reward: 195.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.482 [0.000, 1.000], mean observation: -0.341 [-2.410, 0.941], loss: 3.358240, mean_absolute_error: 31.807392, mean_q: 64.421082
9087/50000: episode: 82, duration: 3.001s, episode steps: 180, steps per second: 60, episode reward: 180.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.478 [0.000, 1.000], mean observation: -0.377 [-2.423, 0.736], loss: 4.412118, mean_absolute_error: 31.724094, mean_q: 64.236374
9287/50000: episode: 83, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.312 [-2.272, 1.019], loss: 4.552834, mean_absolute_error: 32.310112, mean_q: 65.365433
9487/50000: episode: 84, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.216 [-1.623, 0.893], loss: 4.854617, mean_absolute_error: 32.577362, mean_q: 65.837288
9687/50000: episode: 85, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.276 [-2.040, 1.103], loss: 3.461938, mean_absolute_error: 32.954369, mean_q: 66.633324
9887/50000: episode: 86, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.127 [-1.172, 1.166], loss: 6.004831, mean_absolute_error: 33.273647, mean_q: 67.162323
10075/50000: episode: 87, duration: 3.135s, episode steps: 188, steps per second: 60, episode reward: 188.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.473 [0.000, 1.000], mean observation: -0.354 [-2.412, 0.855], loss: 5.738129, mean_absolute_error: 33.524899, mean_q: 67.719902
10275/50000: episode: 88, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.244 [-1.775, 1.160], loss: 2.624167, mean_absolute_error: 33.703171, mean_q: 68.214485
10475/50000: episode: 89, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.475 [0.000, 1.000], mean observation: -0.226 [-1.873, 1.340], loss: 3.631939, mean_absolute_error: 34.271378, mean_q: 69.296211
10674/50000: episode: 90, duration: 3.319s, episode steps: 199, steps per second: 60, episode reward: 199.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.477 [0.000, 1.000], mean observation: -0.331 [-2.420, 1.011], loss: 3.474513, mean_absolute_error: 34.454082, mean_q: 69.673302
10874/50000: episode: 91, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.269 [-1.933, 1.000], loss: 4.352009, mean_absolute_error: 34.625858, mean_q: 69.923943
11069/50000: episode: 92, duration: 3.251s, episode steps: 195, steps per second: 60, episode reward: 195.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.477 [0.000, 1.000], mean observation: -0.337 [-2.404, 1.039], loss: 3.056769, mean_absolute_error: 34.929508, mean_q: 70.556190
11263/50000: episode: 93, duration: 3.236s, episode steps: 194, steps per second: 60, episode reward: 194.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.474 [0.000, 1.000], mean observation: -0.342 [-2.425, 1.044], loss: 4.306901, mean_absolute_error: 35.108334, mean_q: 70.916183
11463/50000: episode: 94, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.163 [-1.498, 1.161], loss: 4.386919, mean_absolute_error: 35.767666, mean_q: 72.153214
11663/50000: episode: 95, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.169 [-0.865, 1.169], loss: 4.370080, mean_absolute_error: 35.383984, mean_q: 71.370903
11863/50000: episode: 96, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.065 [-1.271, 1.286], loss: 4.497574, mean_absolute_error: 35.985268, mean_q: 72.575378
12063/50000: episode: 97, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.282 [-2.043, 1.210], loss: 6.403303, mean_absolute_error: 36.273945, mean_q: 73.116943
12263/50000: episode: 98, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.148 [-1.460, 1.514], loss: 4.413114, mean_absolute_error: 36.738396, mean_q: 74.037003
12463/50000: episode: 99, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.022 [-1.064, 1.105], loss: 5.408292, mean_absolute_error: 36.747997, mean_q: 74.000931
12649/50000: episode: 100, duration: 3.104s, episode steps: 186, steps per second: 60, episode reward: 186.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.457 [0.000, 1.000], mean observation: -0.345 [-3.046, 2.055], loss: 3.838928, mean_absolute_error: 37.139462, mean_q: 74.960800
12849/50000: episode: 101, duration: 3.333s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.217 [-1.844, 1.018], loss: 5.380589, mean_absolute_error: 36.875435, mean_q: 74.342949
13049/50000: episode: 102, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.132 [-1.514, 1.352], loss: 2.637272, mean_absolute_error: 37.260757, mean_q: 75.186722
13249/50000: episode: 103, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.114 [-1.158, 0.969], loss: 3.608623, mean_absolute_error: 37.430157, mean_q: 75.487961
13449/50000: episode: 104, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.162 [-0.995, 1.162], loss: 5.213064, mean_absolute_error: 38.121208, mean_q: 76.815948
13649/50000: episode: 105, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.198 [-1.109, 1.431], loss: 4.655735, mean_absolute_error: 38.179443, mean_q: 76.902809
13849/50000: episode: 106, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.317 [-2.203, 1.277], loss: 5.263301, mean_absolute_error: 38.213272, mean_q: 76.851311
14049/50000: episode: 107, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.150 [-1.030, 1.086], loss: 3.835557, mean_absolute_error: 38.667984, mean_q: 77.856628
14249/50000: episode: 108, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.275 [-0.935, 1.955], loss: 8.752577, mean_absolute_error: 38.999100, mean_q: 78.316658
14449/50000: episode: 109, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.520 [0.000, 1.000], mean observation: 0.285 [-0.924, 1.959], loss: 5.127304, mean_absolute_error: 38.671875, mean_q: 77.872269
14649/50000: episode: 110, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.151 [-1.299, 0.961], loss: 6.609150, mean_absolute_error: 38.845520, mean_q: 78.157936
14849/50000: episode: 111, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.005 [-1.103, 1.152], loss: 5.106807, mean_absolute_error: 39.349018, mean_q: 79.184349
15049/50000: episode: 112, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.268 [-2.217, 1.184], loss: 3.954987, mean_absolute_error: 39.598450, mean_q: 79.854294
15243/50000: episode: 113, duration: 3.235s, episode steps: 194, steps per second: 60, episode reward: 194.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.521 [0.000, 1.000], mean observation: 0.378 [-0.961, 2.426], loss: 4.145005, mean_absolute_error: 39.234550, mean_q: 79.181839
15443/50000: episode: 114, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.124 [-1.031, 1.170], loss: 5.263028, mean_absolute_error: 39.389229, mean_q: 79.353447
15631/50000: episode: 115, duration: 3.134s, episode steps: 188, steps per second: 60, episode reward: 188.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.521 [0.000, 1.000], mean observation: 0.387 [-1.010, 2.400], loss: 8.379552, mean_absolute_error: 39.607052, mean_q: 79.669830
15831/50000: episode: 116, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.189 [-0.832, 1.366], loss: 3.789888, mean_absolute_error: 39.781498, mean_q: 80.170692
16031/50000: episode: 117, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.164 [-0.981, 1.196], loss: 6.850698, mean_absolute_error: 40.000874, mean_q: 80.536003
16231/50000: episode: 118, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.475 [0.000, 1.000], mean observation: -0.252 [-1.876, 1.068], loss: 4.011051, mean_absolute_error: 40.063709, mean_q: 80.683807
16431/50000: episode: 119, duration: 3.332s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.196 [-1.862, 1.482], loss: 4.615864, mean_absolute_error: 40.661175, mean_q: 81.830833
16619/50000: episode: 120, duration: 3.138s, episode steps: 188, steps per second: 60, episode reward: 188.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.463 [0.000, 1.000], mean observation: -0.338 [-2.583, 1.768], loss: 2.920488, mean_absolute_error: 39.994999, mean_q: 80.514793
16819/50000: episode: 121, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.475 [0.000, 1.000], mean observation: -0.317 [-2.275, 1.235], loss: 4.636342, mean_absolute_error: 40.095291, mean_q: 80.743896
17009/50000: episode: 122, duration: 3.169s, episode steps: 190, steps per second: 60, episode reward: 190.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.521 [0.000, 1.000], mean observation: 0.394 [-1.054, 2.410], loss: 5.557212, mean_absolute_error: 40.430817, mean_q: 81.396667
17209/50000: episode: 123, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.219 [-1.536, 1.264], loss: 7.554633, mean_absolute_error: 40.200455, mean_q: 80.877014
17409/50000: episode: 124, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.300 [-0.864, 1.879], loss: 4.708385, mean_absolute_error: 40.157177, mean_q: 80.760124
17609/50000: episode: 125, duration: 3.332s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.105 [-1.190, 1.283], loss: 4.619344, mean_absolute_error: 40.336781, mean_q: 81.098152
17809/50000: episode: 126, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.152 [-1.652, 1.061], loss: 6.770731, mean_absolute_error: 40.184673, mean_q: 80.757912
18009/50000: episode: 127, duration: 3.338s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.019 [-1.295, 1.199], loss: 5.428377, mean_absolute_error: 40.465275, mean_q: 81.295609
18209/50000: episode: 128, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.141 [-1.167, 1.191], loss: 6.389644, mean_absolute_error: 40.620853, mean_q: 81.639580
18409/50000: episode: 129, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.153 [-1.000, 1.300], loss: 9.157187, mean_absolute_error: 40.742519, mean_q: 81.794022
18609/50000: episode: 130, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.158 [-1.514, 1.230], loss: 7.528184, mean_absolute_error: 41.088623, mean_q: 82.429420
18809/50000: episode: 131, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.024 [-1.100, 1.122], loss: 5.259576, mean_absolute_error: 40.731041, mean_q: 81.850197
19009/50000: episode: 132, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.319 [-1.097, 1.895], loss: 9.745716, mean_absolute_error: 40.971035, mean_q: 82.311707
19209/50000: episode: 133, duration: 3.369s, episode steps: 200, steps per second: 59, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.282 [-1.376, 1.637], loss: 5.329291, mean_absolute_error: 41.323174, mean_q: 83.151192
19409/50000: episode: 134, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.205 [-1.304, 1.353], loss: 4.007547, mean_absolute_error: 41.496212, mean_q: 83.561584
19609/50000: episode: 135, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.229 [-1.647, 1.695], loss: 5.861049, mean_absolute_error: 41.768875, mean_q: 84.036064
19809/50000: episode: 136, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.081 [-1.756, 1.720], loss: 10.478913, mean_absolute_error: 41.879375, mean_q: 84.037399
20009/50000: episode: 137, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.259 [-1.298, 1.516], loss: 4.719917, mean_absolute_error: 41.901440, mean_q: 84.503380
20209/50000: episode: 138, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.283 [-1.173, 1.729], loss: 9.245949, mean_absolute_error: 42.925880, mean_q: 86.108597
20409/50000: episode: 139, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.265 [-1.437, 1.472], loss: 6.236575, mean_absolute_error: 42.776779, mean_q: 86.076813
20609/50000: episode: 140, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.191 [-1.407, 1.257], loss: 7.418372, mean_absolute_error: 42.857967, mean_q: 86.075531
20809/50000: episode: 141, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.214 [-1.378, 1.497], loss: 5.418757, mean_absolute_error: 43.485096, mean_q: 87.350250
21009/50000: episode: 142, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.204 [-1.141, 1.311], loss: 10.706781, mean_absolute_error: 43.496582, mean_q: 87.296913
21209/50000: episode: 143, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.241 [-1.050, 1.423], loss: 5.399929, mean_absolute_error: 43.709942, mean_q: 87.907753
21409/50000: episode: 144, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.242 [-1.245, 1.308], loss: 5.399437, mean_absolute_error: 43.914600, mean_q: 88.307205
21609/50000: episode: 145, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.283 [-1.163, 1.464], loss: 8.203210, mean_absolute_error: 44.041092, mean_q: 88.402214
21809/50000: episode: 146, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.014 [-0.952, 1.044], loss: 10.219863, mean_absolute_error: 44.120605, mean_q: 88.466751
22009/50000: episode: 147, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.091 [-1.557, 1.479], loss: 5.603449, mean_absolute_error: 43.979404, mean_q: 88.428947
22209/50000: episode: 148, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.187 [-1.283, 1.334], loss: 8.438824, mean_absolute_error: 44.356178, mean_q: 89.044983
22409/50000: episode: 149, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: -0.138 [-1.323, 1.330], loss: 8.039782, mean_absolute_error: 44.243996, mean_q: 88.687469
22609/50000: episode: 150, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.237 [-1.173, 1.403], loss: 11.193684, mean_absolute_error: 44.176754, mean_q: 88.501640
22809/50000: episode: 151, duration: 3.333s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.155 [-1.217, 1.147], loss: 9.846059, mean_absolute_error: 43.817329, mean_q: 87.798523
23009/50000: episode: 152, duration: 3.337s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.254 [-0.981, 1.523], loss: 8.470125, mean_absolute_error: 43.724064, mean_q: 87.697304
23209/50000: episode: 153, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.147 [-1.041, 1.146], loss: 7.200252, mean_absolute_error: 43.898861, mean_q: 88.064316
23409/50000: episode: 154, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.239 [-0.841, 1.217], loss: 6.163500, mean_absolute_error: 43.745819, mean_q: 87.758888
23609/50000: episode: 155, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.224 [-0.975, 1.150], loss: 7.174826, mean_absolute_error: 43.742043, mean_q: 87.701233
23809/50000: episode: 156, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.190 [-1.123, 1.152], loss: 6.579295, mean_absolute_error: 43.597763, mean_q: 87.627686
24009/50000: episode: 157, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.225 [-0.810, 1.118], loss: 7.638906, mean_absolute_error: 43.443634, mean_q: 87.116348
24209/50000: episode: 158, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.224 [-0.941, 1.146], loss: 7.019081, mean_absolute_error: 43.655724, mean_q: 87.553375
24409/50000: episode: 159, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.184 [-0.864, 0.978], loss: 7.065122, mean_absolute_error: 43.537315, mean_q: 87.399857
24609/50000: episode: 160, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.053 [-1.414, 1.467], loss: 7.068305, mean_absolute_error: 43.326904, mean_q: 87.023460
24809/50000: episode: 161, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.244 [-1.025, 1.130], loss: 6.722576, mean_absolute_error: 43.441849, mean_q: 87.148369
25009/50000: episode: 162, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.242 [-1.293, 1.257], loss: 8.191231, mean_absolute_error: 43.441269, mean_q: 87.211639
25209/50000: episode: 163, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.164 [-1.217, 1.264], loss: 7.406447, mean_absolute_error: 43.770626, mean_q: 87.753761
25409/50000: episode: 164, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.349 [-1.368, 2.124], loss: 5.835207, mean_absolute_error: 43.793808, mean_q: 87.903419
25609/50000: episode: 165, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.179 [-1.045, 1.356], loss: 7.437468, mean_absolute_error: 43.510075, mean_q: 87.227913
25809/50000: episode: 166, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.190 [-1.188, 1.273], loss: 3.556152, mean_absolute_error: 43.680809, mean_q: 87.912445
26009/50000: episode: 167, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.219 [-1.059, 1.151], loss: 6.095132, mean_absolute_error: 43.658768, mean_q: 87.734665
26209/50000: episode: 168, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.236 [-0.898, 1.170], loss: 3.370065, mean_absolute_error: 43.356461, mean_q: 87.286034
26409/50000: episode: 169, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.139 [-1.130, 1.281], loss: 5.408796, mean_absolute_error: 43.550106, mean_q: 87.425255
26609/50000: episode: 170, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.140 [-1.065, 1.193], loss: 7.299086, mean_absolute_error: 43.331287, mean_q: 86.905342
26809/50000: episode: 171, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.189 [-1.077, 1.163], loss: 7.550665, mean_absolute_error: 43.521717, mean_q: 87.372406
27009/50000: episode: 172, duration: 3.337s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.187 [-0.844, 0.986], loss: 8.801920, mean_absolute_error: 43.636620, mean_q: 87.609375
27209/50000: episode: 173, duration: 3.333s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.206 [-1.140, 1.188], loss: 6.645412, mean_absolute_error: 43.178970, mean_q: 86.496407
27409/50000: episode: 174, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.520 [0.000, 1.000], mean observation: 0.261 [-1.301, 1.827], loss: 5.043825, mean_absolute_error: 43.156704, mean_q: 86.606758
27609/50000: episode: 175, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.250 [-1.325, 1.541], loss: 5.164368, mean_absolute_error: 43.135971, mean_q: 86.587013
27809/50000: episode: 176, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.160 [-1.040, 1.278], loss: 4.280077, mean_absolute_error: 43.183594, mean_q: 86.713104
28009/50000: episode: 177, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.201 [-0.761, 1.125], loss: 7.115020, mean_absolute_error: 42.907814, mean_q: 86.099571
28209/50000: episode: 178, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.161 [-0.925, 0.930], loss: 5.515261, mean_absolute_error: 43.057552, mean_q: 86.407677
28409/50000: episode: 179, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.520 [0.000, 1.000], mean observation: 0.256 [-1.314, 1.511], loss: 6.872149, mean_absolute_error: 43.007149, mean_q: 86.258827
28609/50000: episode: 180, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.227 [-0.965, 1.350], loss: 5.596032, mean_absolute_error: 42.849926, mean_q: 86.027725
28809/50000: episode: 181, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.164 [-0.931, 0.971], loss: 4.870154, mean_absolute_error: 42.821621, mean_q: 86.016541
29009/50000: episode: 182, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.204 [-1.161, 1.471], loss: 5.618081, mean_absolute_error: 43.054581, mean_q: 86.422012
29209/50000: episode: 183, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.217 [-0.970, 1.153], loss: 5.332167, mean_absolute_error: 42.679234, mean_q: 85.839630
29409/50000: episode: 184, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.177 [-1.410, 1.274], loss: 7.773335, mean_absolute_error: 42.752365, mean_q: 85.852333
29609/50000: episode: 185, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.230 [-1.176, 1.343], loss: 5.642268, mean_absolute_error: 42.494751, mean_q: 85.254494
29809/50000: episode: 186, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.217 [-0.831, 1.028], loss: 5.922575, mean_absolute_error: 42.577755, mean_q: 85.470886
30009/50000: episode: 187, duration: 3.498s, episode steps: 200, steps per second: 57, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.236 [-1.049, 1.179], loss: 5.689015, mean_absolute_error: 42.729492, mean_q: 85.842262
30209/50000: episode: 188, duration: 1.115s, episode steps: 200, steps per second: 179, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.140 [-1.304, 1.352], loss: 4.919094, mean_absolute_error: 42.400517, mean_q: 85.219162
30409/50000: episode: 189, duration: 0.992s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.185 [-0.993, 1.300], loss: 8.317807, mean_absolute_error: 42.808750, mean_q: 85.811005
30609/50000: episode: 190, duration: 0.988s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.162 [-1.236, 1.136], loss: 6.853027, mean_absolute_error: 42.758446, mean_q: 85.739807
30809/50000: episode: 191, duration: 0.988s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.150 [-1.248, 1.331], loss: 6.204515, mean_absolute_error: 42.489212, mean_q: 85.330078
31009/50000: episode: 192, duration: 0.991s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.117 [-1.039, 0.966], loss: 4.764509, mean_absolute_error: 42.427677, mean_q: 85.335426
31209/50000: episode: 193, duration: 1.070s, episode steps: 200, steps per second: 187, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.215 [-1.465, 1.322], loss: 6.442283, mean_absolute_error: 42.353622, mean_q: 85.141289
31409/50000: episode: 194, duration: 1.050s, episode steps: 200, steps per second: 190, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.030 [-1.295, 1.400], loss: 7.751369, mean_absolute_error: 42.391006, mean_q: 84.900497
31609/50000: episode: 195, duration: 0.988s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.149 [-1.288, 1.287], loss: 3.777361, mean_absolute_error: 42.622257, mean_q: 85.564301
31809/50000: episode: 196, duration: 1.022s, episode steps: 200, steps per second: 196, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.157 [-1.115, 1.009], loss: 7.035195, mean_absolute_error: 42.505516, mean_q: 85.283798
32009/50000: episode: 197, duration: 1.020s, episode steps: 200, steps per second: 196, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.129 [-0.827, 0.861], loss: 5.009069, mean_absolute_error: 42.866982, mean_q: 86.164825
32209/50000: episode: 198, duration: 0.997s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.163 [-0.969, 1.108], loss: 6.226685, mean_absolute_error: 42.838303, mean_q: 85.988838
32409/50000: episode: 199, duration: 0.993s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.127 [-1.252, 1.386], loss: 7.430143, mean_absolute_error: 42.438431, mean_q: 85.119217
32609/50000: episode: 200, duration: 1.877s, episode steps: 200, steps per second: 107, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.168 [-1.242, 1.299], loss: 7.031561, mean_absolute_error: 42.620586, mean_q: 85.556641
32809/50000: episode: 201, duration: 1.960s, episode steps: 200, steps per second: 102, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.147 [-1.008, 0.984], loss: 7.330379, mean_absolute_error: 42.603550, mean_q: 85.444054
33009/50000: episode: 202, duration: 3.337s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.116 [-1.092, 0.910], loss: 7.340349, mean_absolute_error: 42.490467, mean_q: 85.269951
33209/50000: episode: 203, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.172 [-1.338, 1.146], loss: 5.301699, mean_absolute_error: 42.502686, mean_q: 85.343903
33409/50000: episode: 204, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.115 [-0.973, 1.116], loss: 6.097166, mean_absolute_error: 42.663502, mean_q: 85.625977
33609/50000: episode: 205, duration: 3.324s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.109 [-1.033, 0.934], loss: 7.296375, mean_absolute_error: 42.569210, mean_q: 85.351311
33809/50000: episode: 206, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.136 [-0.953, 0.862], loss: 8.543866, mean_absolute_error: 42.446793, mean_q: 84.996368
34009/50000: episode: 207, duration: 3.326s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.165 [-1.003, 1.149], loss: 7.525975, mean_absolute_error: 42.001942, mean_q: 84.264984
34209/50000: episode: 208, duration: 3.342s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.189 [-1.082, 1.134], loss: 5.697645, mean_absolute_error: 41.951153, mean_q: 84.207092
34409/50000: episode: 209, duration: 3.339s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.134 [-1.146, 1.186], loss: 5.117388, mean_absolute_error: 42.079453, mean_q: 84.511292
34609/50000: episode: 210, duration: 3.332s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.135 [-1.064, 1.092], loss: 7.106796, mean_absolute_error: 41.923141, mean_q: 84.088203
34809/50000: episode: 211, duration: 3.328s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.101 [-1.270, 1.112], loss: 5.929490, mean_absolute_error: 41.685123, mean_q: 83.752380
35009/50000: episode: 212, duration: 3.338s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.208 [-1.517, 1.483], loss: 3.712490, mean_absolute_error: 42.175907, mean_q: 84.549286
35209/50000: episode: 213, duration: 3.333s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.183 [-1.348, 1.367], loss: 7.126879, mean_absolute_error: 42.040630, mean_q: 84.034904
35409/50000: episode: 214, duration: 3.342s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: 0.149 [-1.146, 1.379], loss: 6.441104, mean_absolute_error: 41.516346, mean_q: 83.153275
35609/50000: episode: 215, duration: 4.949s, episode steps: 200, steps per second: 40, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.201 [-1.361, 1.643], loss: 5.780784, mean_absolute_error: 41.763515, mean_q: 83.625511
35809/50000: episode: 216, duration: 3.333s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.103 [-0.920, 1.052], loss: 5.081985, mean_absolute_error: 41.187489, mean_q: 82.494064
36009/50000: episode: 217, duration: 3.332s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.134 [-1.348, 1.199], loss: 5.297232, mean_absolute_error: 41.399727, mean_q: 83.079201
36209/50000: episode: 218, duration: 3.335s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.133 [-1.204, 1.405], loss: 6.443807, mean_absolute_error: 41.111160, mean_q: 82.424736
36409/50000: episode: 219, duration: 3.331s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.193 [-0.872, 1.136], loss: 6.352515, mean_absolute_error: 40.847176, mean_q: 81.930771
36609/50000: episode: 220, duration: 3.367s, episode steps: 200, steps per second: 59, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.158 [-0.962, 1.139], loss: 5.646825, mean_absolute_error: 41.139488, mean_q: 82.489235
36809/50000: episode: 221, duration: 3.334s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.177 [-1.056, 1.101], loss: 5.816448, mean_absolute_error: 40.826656, mean_q: 81.862366
37009/50000: episode: 222, duration: 3.331s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.134 [-1.060, 1.126], loss: 5.460950, mean_absolute_error: 40.779613, mean_q: 81.803101
37209/50000: episode: 223, duration: 3.402s, episode steps: 200, steps per second: 59, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: 0.178 [-1.065, 1.180], loss: 8.732397, mean_absolute_error: 40.484825, mean_q: 80.981766
37409/50000: episode: 224, duration: 3.615s, episode steps: 200, steps per second: 55, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.185 [-1.155, 1.122], loss: 4.804307, mean_absolute_error: 40.802700, mean_q: 81.889763
37609/50000: episode: 225, duration: 3.320s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.148 [-1.115, 1.372], loss: 6.825720, mean_absolute_error: 40.651947, mean_q: 81.445000
37809/50000: episode: 226, duration: 1.126s, episode steps: 200, steps per second: 178, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.142 [-1.136, 1.143], loss: 6.730220, mean_absolute_error: 40.191383, mean_q: 80.584053
38009/50000: episode: 227, duration: 0.994s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.091 [-0.955, 1.085], loss: 6.396078, mean_absolute_error: 40.215801, mean_q: 80.562454
38209/50000: episode: 228, duration: 0.984s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.075 [-0.943, 0.865], loss: 6.088786, mean_absolute_error: 40.172867, mean_q: 80.596992
38409/50000: episode: 229, duration: 0.995s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.112 [-0.812, 0.864], loss: 1.997710, mean_absolute_error: 40.250900, mean_q: 80.862282
38609/50000: episode: 230, duration: 1.001s, episode steps: 200, steps per second: 200, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.049 [-1.119, 1.127], loss: 6.160636, mean_absolute_error: 40.428856, mean_q: 80.986099
38809/50000: episode: 231, duration: 1.056s, episode steps: 200, steps per second: 189, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.074 [-0.863, 1.075], loss: 8.184495, mean_absolute_error: 40.187820, mean_q: 80.483635
39009/50000: episode: 232, duration: 0.996s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.044 [-1.204, 1.134], loss: 3.622871, mean_absolute_error: 40.255074, mean_q: 80.925217
39209/50000: episode: 233, duration: 0.980s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.137 [-0.929, 1.091], loss: 5.077471, mean_absolute_error: 40.321312, mean_q: 80.902649
39409/50000: episode: 234, duration: 1.084s, episode steps: 200, steps per second: 184, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.105 [-1.114, 1.135], loss: 6.608530, mean_absolute_error: 40.088284, mean_q: 80.393303
39609/50000: episode: 235, duration: 1.005s, episode steps: 200, steps per second: 199, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.051 [-1.248, 1.292], loss: 6.007174, mean_absolute_error: 40.184326, mean_q: 80.673035
39809/50000: episode: 236, duration: 0.987s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.107 [-0.895, 1.077], loss: 7.174881, mean_absolute_error: 40.035736, mean_q: 80.206253
40009/50000: episode: 237, duration: 0.985s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.119 [-0.957, 1.027], loss: 5.487797, mean_absolute_error: 40.059414, mean_q: 80.356056
40209/50000: episode: 238, duration: 0.982s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.059 [-0.967, 0.931], loss: 4.557930, mean_absolute_error: 40.064159, mean_q: 80.418556
40409/50000: episode: 239, duration: 0.989s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.125 [-1.383, 1.303], loss: 9.063901, mean_absolute_error: 39.989731, mean_q: 80.075722
40609/50000: episode: 240, duration: 0.994s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.098 [-1.210, 1.092], loss: 5.767336, mean_absolute_error: 39.736691, mean_q: 79.670441
40809/50000: episode: 241, duration: 0.998s, episode steps: 200, steps per second: 200, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.123 [-1.371, 1.511], loss: 5.479912, mean_absolute_error: 40.145885, mean_q: 80.428932
41009/50000: episode: 242, duration: 0.984s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.101 [-1.051, 1.090], loss: 6.235820, mean_absolute_error: 40.096878, mean_q: 80.272614
41209/50000: episode: 243, duration: 0.985s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.026 [-1.083, 1.113], loss: 7.943477, mean_absolute_error: 39.868481, mean_q: 79.821266
41409/50000: episode: 244, duration: 0.991s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.010 [-1.051, 1.182], loss: 3.675431, mean_absolute_error: 40.009277, mean_q: 80.275085
41609/50000: episode: 245, duration: 0.984s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.046 [-0.832, 1.010], loss: 9.291725, mean_absolute_error: 39.614330, mean_q: 79.211609
41809/50000: episode: 246, duration: 0.990s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.058 [-0.949, 1.051], loss: 5.863860, mean_absolute_error: 39.757206, mean_q: 79.708611
42009/50000: episode: 247, duration: 0.989s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.067 [-0.982, 0.995], loss: 6.449609, mean_absolute_error: 39.226017, mean_q: 78.642532
42209/50000: episode: 248, duration: 0.981s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.032 [-1.106, 0.982], loss: 3.354485, mean_absolute_error: 39.346268, mean_q: 79.033424
42409/50000: episode: 249, duration: 0.983s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.089 [-0.836, 1.165], loss: 6.333947, mean_absolute_error: 39.907608, mean_q: 80.091072
42609/50000: episode: 250, duration: 0.985s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.083 [-0.970, 1.426], loss: 7.548281, mean_absolute_error: 39.607533, mean_q: 79.240143
42809/50000: episode: 251, duration: 0.981s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.079 [-0.919, 1.047], loss: 4.923182, mean_absolute_error: 39.350220, mean_q: 78.874298
43009/50000: episode: 252, duration: 0.982s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.008 [-1.332, 1.231], loss: 6.567420, mean_absolute_error: 39.431835, mean_q: 78.989983
43209/50000: episode: 253, duration: 0.981s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.017 [-1.116, 1.366], loss: 6.480456, mean_absolute_error: 39.555004, mean_q: 79.125443
43409/50000: episode: 254, duration: 0.981s, episode steps: 200, steps per second: 204, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.015 [-1.075, 1.088], loss: 7.339999, mean_absolute_error: 39.301491, mean_q: 78.577507
43547/50000: episode: 255, duration: 116.753s, episode steps: 138, steps per second: 1, episode reward: 138.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.536 [0.000, 1.000], mean observation: 0.296 [-1.920, 3.030], loss: 9.098721, mean_absolute_error: 39.643921, mean_q: 79.179260
43747/50000: episode: 256, duration: 1.710s, episode steps: 200, steps per second: 117, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.054 [-1.494, 1.558], loss: 9.551335, mean_absolute_error: 39.396172, mean_q: 78.684990
43947/50000: episode: 257, duration: 2.118s, episode steps: 200, steps per second: 94, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.001 [-0.959, 0.961], loss: 7.882089, mean_absolute_error: 39.031338, mean_q: 77.961517
44147/50000: episode: 258, duration: 5.079s, episode steps: 200, steps per second: 39, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.022 [-1.090, 1.134], loss: 8.912285, mean_absolute_error: 38.973499, mean_q: 77.770729
44347/50000: episode: 259, duration: 2.591s, episode steps: 200, steps per second: 77, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: -0.002 [-1.348, 1.572], loss: 6.041744, mean_absolute_error: 39.190418, mean_q: 78.410736
44547/50000: episode: 260, duration: 3.336s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.480 [0.000, 1.000], mean observation: -0.298 [-1.899, 1.063], loss: 7.900136, mean_absolute_error: 38.742985, mean_q: 77.437988
44747/50000: episode: 261, duration: 3.349s, episode steps: 200, steps per second: 60, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.009 [-1.453, 1.051], loss: 8.264863, mean_absolute_error: 38.517262, mean_q: 77.212463
44947/50000: episode: 262, duration: 3.228s, episode steps: 200, steps per second: 62, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.045 [-1.325, 1.581], loss: 7.823361, mean_absolute_error: 38.459747, mean_q: 76.857193
45147/50000: episode: 263, duration: 1.147s, episode steps: 200, steps per second: 174, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.034 [-0.981, 0.949], loss: 6.672464, mean_absolute_error: 38.964336, mean_q: 77.963058
45347/50000: episode: 264, duration: 1.187s, episode steps: 200, steps per second: 168, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.043 [-1.103, 1.150], loss: 4.754271, mean_absolute_error: 38.648502, mean_q: 77.466698
45547/50000: episode: 265, duration: 1.103s, episode steps: 200, steps per second: 181, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.010 [-1.104, 1.294], loss: 8.511153, mean_absolute_error: 38.705173, mean_q: 77.498650
45747/50000: episode: 266, duration: 1.005s, episode steps: 200, steps per second: 199, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.031 [-1.644, 1.351], loss: 5.461424, mean_absolute_error: 38.530750, mean_q: 77.302589
45947/50000: episode: 267, duration: 1.033s, episode steps: 200, steps per second: 194, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.003 [-1.128, 0.978], loss: 4.853215, mean_absolute_error: 38.834965, mean_q: 77.845779
46147/50000: episode: 268, duration: 1.040s, episode steps: 200, steps per second: 192, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.025 [-1.087, 1.207], loss: 10.625805, mean_absolute_error: 38.911812, mean_q: 77.609161
46347/50000: episode: 269, duration: 1.010s, episode steps: 200, steps per second: 198, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.006 [-1.421, 1.249], loss: 7.743947, mean_absolute_error: 39.175140, mean_q: 78.304199
46547/50000: episode: 270, duration: 1.004s, episode steps: 200, steps per second: 199, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.485 [0.000, 1.000], mean observation: -0.219 [-1.767, 1.390], loss: 7.634665, mean_absolute_error: 38.735641, mean_q: 77.531990
46747/50000: episode: 271, duration: 1.063s, episode steps: 200, steps per second: 188, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.002 [-1.032, 0.812], loss: 6.030808, mean_absolute_error: 38.968929, mean_q: 77.933571
46947/50000: episode: 272, duration: 1.017s, episode steps: 200, steps per second: 197, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: -0.035 [-1.316, 1.467], loss: 5.820978, mean_absolute_error: 38.765678, mean_q: 77.549904
47147/50000: episode: 273, duration: 1.013s, episode steps: 200, steps per second: 197, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: 0.002 [-1.033, 1.087], loss: 8.830095, mean_absolute_error: 38.866894, mean_q: 77.763382
47347/50000: episode: 274, duration: 1.022s, episode steps: 200, steps per second: 196, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.009 [-1.360, 1.260], loss: 6.721583, mean_absolute_error: 39.033733, mean_q: 78.132927
47547/50000: episode: 275, duration: 1.008s, episode steps: 200, steps per second: 198, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.001 [-1.503, 1.708], loss: 9.698984, mean_absolute_error: 39.220840, mean_q: 78.268196
47747/50000: episode: 276, duration: 0.995s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.019 [-1.113, 0.924], loss: 5.459605, mean_absolute_error: 38.597931, mean_q: 77.380501
47947/50000: episode: 277, duration: 1.000s, episode steps: 200, steps per second: 200, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: -0.016 [-1.776, 1.701], loss: 7.921328, mean_absolute_error: 38.983219, mean_q: 78.157463
48147/50000: episode: 278, duration: 1.003s, episode steps: 200, steps per second: 199, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.026 [-1.103, 1.071], loss: 7.670476, mean_absolute_error: 39.056908, mean_q: 78.182556
48347/50000: episode: 279, duration: 1.031s, episode steps: 200, steps per second: 194, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.008 [-1.379, 1.420], loss: 12.260652, mean_absolute_error: 39.183327, mean_q: 78.317581
48547/50000: episode: 280, duration: 1.006s, episode steps: 200, steps per second: 199, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.016 [-0.975, 1.179], loss: 5.823016, mean_absolute_error: 39.145679, mean_q: 78.550179
48747/50000: episode: 281, duration: 0.988s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.505 [0.000, 1.000], mean observation: -0.008 [-1.141, 1.377], loss: 7.670513, mean_absolute_error: 39.428028, mean_q: 79.101784
48947/50000: episode: 282, duration: 0.986s, episode steps: 200, steps per second: 203, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.495 [0.000, 1.000], mean observation: 0.010 [-1.224, 1.426], loss: 6.572105, mean_absolute_error: 39.766102, mean_q: 79.782700
49147/50000: episode: 283, duration: 0.991s, episode steps: 200, steps per second: 202, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.510 [0.000, 1.000], mean observation: 0.011 [-1.096, 0.936], loss: 11.384002, mean_absolute_error: 39.836002, mean_q: 79.536385
49347/50000: episode: 284, duration: 0.994s, episode steps: 200, steps per second: 201, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: 0.040 [-1.139, 1.306], loss: 7.600483, mean_absolute_error: 39.287796, mean_q: 78.650002
49547/50000: episode: 285, duration: 1.001s, episode steps: 200, steps per second: 200, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.490 [0.000, 1.000], mean observation: 0.062 [-1.657, 1.546], loss: 9.537685, mean_absolute_error: 39.594921, mean_q: 79.310692
49747/50000: episode: 286, duration: 1.159s, episode steps: 200, steps per second: 173, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.515 [0.000, 1.000], mean observation: 0.072 [-0.906, 1.276], loss: 10.738833, mean_absolute_error: 39.665150, mean_q: 79.364563
49947/50000: episode: 287, duration: 1.027s, episode steps: 200, steps per second: 195, episode reward: 200.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.500 [0.000, 1.000], mean observation: 0.021 [-1.005, 1.086], loss: 11.605513, mean_absolute_error: 39.305538, mean_q: 78.426559
done, took 793.158 seconds
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-11-501a28be4ff4> in <module>()
2
3 # After training is done, we save the final weights.
----> 4 dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
NameError: name 'ENV_NAME' is not defined
In [ ]:
dqn.test(env, nb_episodes=5, visualize=True)
In [ ]:
Content source: ivukotic/ML_platform_tests
Similar notebooks: