In [1]:
from ddpg3 import *
%matplotlib inline


[2016-07-20 08:48:40,071] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])

In [2]:
car = ddpg(environment = 'MountainCarContinuous-v0', noise_scale= 1.0)
car.main()


[2016-07-20 08:48:40,100] Making new env: MountainCarContinuous-v0
state dim 2
action dim 1
using rescaling mean: Tensor("batch_mean:0", shape=(2,), dtype=float32)
using rescaling variance: Tensor("batch_variance:0", shape=(2,), dtype=float32)
[2016-07-20 08:48:41,944] Action '1.3135719745' is not contained within action space 'Box(1,)'.
result after minibatch no. 0 : mean squared error: 1.0
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 1000 : mean squared error: 0.0212438367307
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 2000 : mean squared error: 0.0107317604125
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 3000 : mean squared error: 0.0022920621559
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 4000 : mean squared error: 0.00203390768729
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 5000 : mean squared error: 0.00275427498855
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 6000 : mean squared error: 0.00137558626011
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 7000 : mean squared error: 0.00264958525077
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 8000 : mean squared error: 0.000685755047016
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 9000 : mean squared error: 0.0191002245992
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 10000 : mean squared error: 0.00265004695393
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 11000 : mean squared error: 0.00482380855829
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 12000 : mean squared error: 0.00653684791178
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 13000 : mean squared error: 0.00274541089311
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 14000 : mean squared error: 0.0195048656315
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 15000 : mean squared error: 0.00189072149806
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 16000 : mean squared error: 0.0243048612028
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 17000 : mean squared error: 0.02397223562
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 18000 : mean squared error: 0.0556298345327
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 19000 : mean squared error: 0.0266380440444
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 20000 : mean squared error: 0.00253886543214
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 21000 : mean squared error: 0.0153667377308
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 22000 : mean squared error: 0.0281686633825
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 23000 : mean squared error: 0.00698087830096
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 24000 : mean squared error: 0.0042253723368
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 25000 : mean squared error: 0.0189623720944
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 26000 : mean squared error: 0.00263949437067
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 27000 : mean squared error: 0.0375488623977
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 28000 : mean squared error: 0.0309919118881
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 29000 : mean squared error: 0.00277064321563
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 30000 : mean squared error: 0.108498051763
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 31000 : mean squared error: 0.0900608748198
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 32000 : mean squared error: 0.0323974266648
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 33000 : mean squared error: 0.179548367858
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 34000 : mean squared error: 0.0286772530526
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 35000 : mean squared error: 0.033247448504
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 36000 : mean squared error: 0.00199680309743
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 37000 : mean squared error: 0.032655403018
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 38000 : mean squared error: 0.0156300999224
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 39000 : mean squared error: 0.151042088866
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 40000 : mean squared error: 0.00551175745204
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 41000 : mean squared error: 0.00564322667196
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 42000 : mean squared error: 0.0702347010374
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 43000 : mean squared error: 0.00860916171223
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 44000 : mean squared error: 0.0639006644487
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 45000 : mean squared error: 0.0220170598477
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 46000 : mean squared error: 0.0562079772353
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 47000 : mean squared error: 0.0192162897438
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 48000 : mean squared error: 0.0169645808637
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 49000 : mean squared error: 0.0791811570525
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0
result after minibatch no. 50000 : mean squared error: 0.00715968385339
plotting the mu() policy learned by NN
plotting the Qfunction
action 0.0
action 0.5
action 1.0
action 1.5
action 2.0

In [ ]:


In [ ]: