In [1]:
from mountaincar_nn import *
[2016-07-01 15:09:43,372] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])
In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
In [3]:
from train_mu_offline import *
In [4]:
t1 = mu_offline_training(use_batchnorm= True)
t1.start_training()
[2016-07-01 15:10:02,267] Making new env: MountainCarContinuous-v0
moutaincar_dpg.py:375: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
policy_vals = np.zeros((resolution, resolution))
moutaincar_dpg.py:126: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
grid = np.zeros(np.ones(obs_dim)*self.tile_resolution)
action limits (array([ 0.]), array([ 2.]))
N_0 50.0
using environment MountainCarContinuous-v0
tile resolution 10.0
gamma 0.99
[ 1. 0.67835728 0.62276976 0.62209078 0.8526459 2.86599393
2.887229 1.02836613 1. 1. 0.93533027 -0.38809582
0.6045682 1.0698471 1.14473635 1.17997436 1.55615993 2.59707689
0.74289175 1. 0.44594718 0.10847247 0.71581806 0.92107525
1.09654035 1.23673655 1.43051834 1.04543336 0.61158972 1.
0.06202713 -0.14023047 0.46513577 -0.81585296 2.23711758 3.71232037
1.85801243 1.22226744 2.06895135 1.00617072 1.00455976 -0.18160138
0.04829047 0.23244138 -0.96749015 1.25475142 3.37298761 1.31216807
1.89625168 1.05661102 0.99939357 0.77621888 0.80518055 0.11962489
0.58241356 1.45858352 1.57595806 1.40786321 3.42616224 0.99983717
1. 1.04137001 0.27077842 0.42664555 0.81870769 0.94528861
1.1842359 1.43590492 1.54714103 1. 1. 1.
1.09332226 0.27452517 0.69129624 0.9348839 1.64996572 1.36786012
1.00271191 1. 1. 1. 1. 0.97979885
0.2292072 1.13224935 1.97769483 1.31925871 1. 1. 1.
1. 1. 1. 1.11007973 0.9828954 1.29067028
1.0021732 1. 1. ]
[20001, 12755, 10774, 10457, 3797, 4972, 3839, 963, 2246, 2066, 2250, 1670, 2426, 689, 622, 2121, 1355, 2652, 1379, 2178, 1830, 1402, 573, 799, 1009, 521, 1572, 610, 768, 842, 1365, 1334, 923, 2560, 830, 627, 1124, 370, 885, 1257, 808, 725, 911, 387, 407, 684, 485, 845, 694, 480, 638, 754, 430, 475, 540, 460, 514, 834, 407, 634, 472, 586, 716, 440, 563, 813, 526, 461, 576, 465, 405, 481, 280, 1150, 744, 464, 529, 320, 604, 423, 1011, 467, 535, 359, 456, 379, 290, 459, 442, 718, 479, 348, 513, 631, 370, 386, 374, 848, 533, 375, 357, 290, 295, 290, 318, 563, 305, 579, 320, 288, 497, 372, 285, 396, 483, 288, 288, 366, 376, 313, 419, 398, 295, 368, 370, 398, 510, 407, 474, 450, 276, 207, 304, 374, 358, 412, 262, 397, 374, 382, 379, 367, 288, 459, 287, 483, 287, 370, 389, 321, 371, 442, 286, 328, 282, 203, 291, 239, 315, 375, 287, 418, 278, 296, 455, 327, 289, 288, 511, 214, 281, 359, 392, 310, 239, 342, 380, 306, 339, 300, 242, 289, 295, 283, 298, 211, 376, 310, 347, 230, 232, 278, 219, 287, 314, 277, 286, 280, 295, 294, 330, 287, 280, 282, 231, 319, 301, 210, 315, 331, 291, 230, 363, 331, 322, 477, 324, 211, 232, 292, 279, 290, 170, 279, 292, 214, 286, 186, 303, 401, 279, 194, 290, 193, 228, 175, 289, 323, 283, 232, 373, 225, 201, 269, 275, 231, 295, 226, 277, 242, 206, 193, 291, 197, 195, 196, 285, 251, 376, 276, 199, 275, 235, 281, 244, 241, 291, 241, 304, 222, 203, 306, 268, 221, 202, 274, 297, 280, 367, 250, 245, 197, 224, 198, 188, 253, 307, 214, 204, 189, 160, 279, 212, 274, 272, 191]
[20001, 12755, 10774, 10457, 3797, 4972, 3839, 963, 2246, 2066, 2250, 1670, 2426, 689, 622, 2121, 1355, 2652, 1379, 2178, 1830, 1402, 573, 799, 1009, 521, 1572, 610, 768, 842, 1365, 1334, 923, 2560, 830, 627, 1124, 370, 885, 1257, 808, 725, 911, 387, 407, 684, 485, 845, 694, 480, 638, 754, 430, 475, 540, 460, 514, 834, 407, 634, 472, 586, 716, 440, 563, 813, 526, 461, 576, 465, 405, 481, 280, 1150, 744, 464, 529, 320, 604, 423, 1011, 467, 535, 359, 456, 379, 290, 459, 442, 718, 479, 348, 513, 631, 370, 386, 374, 848, 533, 375, 357, 290, 295, 290, 318, 563, 305, 579, 320, 288, 497, 372, 285, 396, 483, 288, 288, 366, 376, 313, 419, 398, 295, 368, 370, 398, 510, 407, 474, 450, 276, 207, 304, 374, 358, 412, 262, 397, 374, 382, 379, 367, 288, 459, 287, 483, 287, 370, 389, 321, 371, 442, 286, 328, 282, 203, 291, 239, 315, 375, 287, 418, 278, 296, 455, 327, 289, 288, 511, 214, 281, 359, 392, 310, 239, 342, 380, 306, 339, 300, 242, 289, 295, 283, 298, 211, 376, 310, 347, 230, 232, 278, 219, 287, 314, 277, 286, 280, 295, 294, 330, 287, 280, 282, 231, 319, 301, 210, 315, 331, 291, 230, 363, 331, 322, 477, 324, 211, 232, 292, 279, 290, 170, 279, 292, 214, 286, 186, 303, 401, 279, 194, 290, 193, 228, 175, 289, 323, 283, 232, 373, 225, 201, 269, 275, 231, 295, 226, 277, 242, 206, 193, 291, 197, 195, 196, 285, 251, 376, 276, 199, 275, 235, 281, 244, 241, 291, 241, 304, 222, 203, 306, 268, 221, 202, 274, 297, 280, 367, 250, 245, 197, 224, 198, 188, 253, 307, 214, 204, 189, 160, 279, 212, 274, 272, 191]
result after minibatch no. 500 : mean squared error: 0.497341305017
batch train data [[ -7.97164409e-01 -6.78202638e-02]
[ -1.00923210e+00 -3.47820658e-02]
[ -9.59849052e-01 -8.79590286e-03]
[ -5.89818509e-01 3.74665167e-02]
[ 3.06526705e-01 4.87798747e-02]
[ -1.93261024e-01 4.88378012e-03]
[ -1.02765046e+00 1.11913252e-03]
[ -5.25424805e-01 -2.97713053e-02]
[ 3.58121057e-01 5.33460586e-02]
[ 5.26561841e-02 4.49640692e-02]
[ -3.75407369e-01 -2.03657189e-03]
[ 3.83927301e-01 1.91929942e-02]
[ 5.53191610e-01 4.85076679e-02]
[ -3.31405717e-01 -5.42281491e-02]
[ 3.88307910e-01 2.72300885e-02]
[ -2.25038050e-01 3.01189749e-02]
[ -1.03577695e+00 -2.56944874e-02]
[ -1.05990452e+00 2.46364728e-02]
[ -9.26188034e-01 4.45322792e-02]
[ -4.70523233e-01 -1.04877740e-03]
[ -1.92730057e-01 -5.31900906e-02]
[ -1.02652257e+00 -4.36389696e-03]
[ 5.11426170e-01 -6.91379244e-02]
[ -2.48000718e-01 -4.82439432e-02]
[ -1.87445633e-01 2.56464248e-02]
[ -6.36712104e-01 -2.74465237e-02]
[ -6.68376881e-01 -4.97914406e-02]
[ -1.16804106e+00 5.66242696e-02]
[ -8.41452676e-01 7.34989186e-04]
[ -9.21266673e-02 1.99852477e-02]
[ -5.24420820e-01 4.28142203e-02]
[ -3.43590282e-01 6.01588649e-02]
[ 2.22275393e-01 -3.95295911e-02]
[ -6.74282452e-01 1.39011661e-02]
[ -2.23172247e-01 -1.66934710e-02]
[ 1.50741481e-01 6.50529415e-03]
[ -3.52083187e-01 2.73469175e-02]
[ 5.80841590e-01 -5.25672527e-02]
[ -1.16866629e+00 6.49670241e-02]
[ 2.56550856e-01 4.97620748e-02]
[ -1.04466312e-01 -1.42064218e-02]
[ -1.01346018e+00 5.95261913e-02]
[ -4.34582674e-01 1.74197090e-02]
[ 3.15240742e-01 5.22158460e-03]
[ -1.50714122e-01 4.07610836e-02]
[ -2.44219962e-01 -4.57716078e-02]
[ 4.60677308e-02 6.17687015e-02]
[ -6.58940797e-01 4.82888047e-02]
[ -4.80523862e-01 1.02803133e-02]
[ 3.32799371e-01 -6.44141100e-03]]
batch train labels [[ 0.44594718]
[ 0.6045682 ]
[ 1.14473635]
[ 1.22226744]
[ 1. ]
[ 1.45858352]
[ 2.86599393]
[ 0.46513577]
[ 1. ]
[ 1.54714103]
[-0.96749015]
[ 1.97769483]
[ 1. ]
[-0.18160138]
[ 1.97769483]
[ 1.40786321]
[ 0.62209078]
[ 2.887229 ]
[ 0.74289175]
[-0.96749015]
[ 0.77621888]
[ 0.8526459 ]
[ 1. ]
[ 0.77621888]
[ 1.57595806]
[-0.81585296]
[ 0.10847247]
[ 1. ]
[ 1.17997436]
[ 1.1842359 ]
[ 2.06895135]
[ 1.05661102]
[ 1.09332226]
[ 1.23673655]
[ 0.11962489]
[ 0.9348839 ]
[ 3.37298761]
[ 1. ]
[ 1. ]
[ 1. ]
[ 0.42664555]
[ 1. ]
[ 3.37298761]
[ 1.13224935]
[ 1.40786321]
[ 0.77621888]
[ 1. ]
[ 2.06895135]
[ 3.71232037]
[ 0.2292072 ]]
plotting the mu() policy learned by NN
[2016-07-01 15:10:13,466] Observation '[[ -4.89154845e-01]
[ 1.97596994e-04]]' is not contained within observation space 'Box(2,)'.
state [-0.48935245 0. ]
[ 1.45399284]
state [[ -4.89154845e-01]
[ 1.97596994e-04]]
[ 1.4676019]
state [[ -4.88747507e-01]
[ 4.07329004e-04]]
[ 1.4812485]
state [[-0.48811984]
[ 0.00062767]]
[ 1.49477792]
state [[-0.48726299]
[ 0.00085686]]
[ 1.508026]
state [[-0.48617008]
[ 0.0010929 ]]
[ 1.52082062]
state [[-0.48483649]
[ 0.0013336 ]]
[ 1.53298128]
state [[-0.48325998]
[ 0.00157652]]
[ 1.54432225]
state [[-0.48144093]
[ 0.00181904]]
[ 1.5532347]
state [[-0.47938401]
[ 0.00205693]]
[ 1.56076872]
state [[-0.47709695]
[ 0.00228706]]
[ 1.56671083]
state [[-0.47459081]
[ 0.00250614]]
[ 1.56950462]
state [[-0.47188139]
[ 0.00270941]]
[ 1.57028186]
state [[-0.46898803]
[ 0.00289336]]
[ 1.56803358]
state [[-0.4659344 ]
[ 0.00305364]]
[ 1.56366849]
state [[-0.46274742]
[ 0.00318698]]
[ 1.55711639]
state [[-0.45945719]
[ 0.00329025]]
[ 1.54832828]
state [[-0.45609671]
[ 0.00336047]]
[ 1.53727937]
state [[-0.45270178]
[ 0.00339494]]
[ 1.5247457]
state [[-0.44930983]
[ 0.00339196]]
[ 1.51158309]
state [[-0.44595885]
[ 0.00335098]]
[ 1.49586892]
state [[-0.44268906]
[ 0.0032698 ]]
[ 1.47761428]
state [[-0.43954253]
[ 0.00314653]]
[ 1.45684493]
state [[-0.43656293]
[ 0.00297961]]
[ 1.43361759]
state [[-0.43379506]
[ 0.00276785]]
[ 1.40808654]
state [[-0.43128455]
[ 0.00251053]]
[ 1.37978375]
state [[-0.42907777]
[ 0.00220677]]
[ 1.34626341]
state [[-0.42722419]
[ 0.00185359]]
[ 1.31039929]
state [[-0.42577296]
[ 0.00145121]]
[ 1.27324867]
state [[-0.42477173]
[ 0.00100125]]
[ 1.23588383]
state [[-0.42426497]
[ 0.00050674]]
[ 1.19430661]
state [[ -4.24297929e-01]
[ -3.29740578e-05]]
[ 1.15108645]
state [[-0.42491361]
[-0.00061568]]
[ 1.10717356]
state [[-0.42615148]
[-0.00123788]]
[ 1.06303549]
state [[-0.42804682]
[-0.00189533]]
[ 1.01918089]
state [[-0.43062985]
[-0.00258302]]
[ 0.97615647]
state [[-0.43392497]
[-0.00329512]]
[ 0.93454462]
state [[-0.43795002]
[-0.00402505]]
[ 0.89514959]
state [[-0.44271523]
[-0.00476522]]
[ 0.86080933]
state [[-0.44822031]
[-0.0055051 ]]
[ 0.83099198]
state [[-0.45445496]
[-0.00623464]]
[ 0.80637974]
state [[-0.46139809]
[-0.00694313]]
[ 0.78528816]
state [[-0.46901974]
[-0.00762164]]
[ 0.7648173]
state [[-0.47728407]
[-0.00826434]]
[ 0.7502116]
state [[-0.48614445]
[-0.00886037]]
[ 0.74114555]
state [[-0.49554399]
[-0.00939955]]
[ 0.73564947]
state [[-0.50541806]
[-0.00987406]]
[ 0.73742157]
state [[-0.51569098]
[-0.01027292]]
[ 0.7460441]
state [[-0.52627718]
[-0.01058618]]
[ 0.76150888]
state [[-0.53708178]
[-0.01080459]]
[ 0.78411072]
state [[-0.54800117]
[-0.01091938]]
[ 0.81289232]
state [[-0.55892479]
[-0.01092363]]
[ 0.84898943]
state [[-0.56973499]
[-0.0108102 ]]
[ 0.89078784]
state [[-0.58030951]
[-0.01057449]]
[ 0.93847668]
state [[-0.59052223]
[-0.01021273]]
[ 0.99479914]
state [[-0.6002416 ]
[-0.00971937]]
[ 1.06095719]
state [[-0.60933024]
[-0.00908864]]
[ 1.1350987]
state [[-0.61764789]
[-0.00831763]]
[ 1.21103966]
state [[-0.62505841]
[-0.00741054]]
[ 1.28594661]
state [[-0.63143373]
[-0.00637534]]
[ 1.35643566]
state [[-0.63665789]
[-0.00522418]]
[ 1.41823077]
state [[-0.64063203]
[-0.00397417]]
[ 1.48050356]
state [[-0.64326584]
[-0.00263384]]
[ 1.54354775]
state [[-0.64447778]
[-0.00121194]]
[ 1.60305083]
state [[ -6.44199848e-01]
[ 2.77962885e-04]]
[ 1.66914368]
state [[-0.64236784]
[ 0.00183201]]
[ 1.74117267]
state [[-0.63892263]
[ 0.00344522]]
[ 1.81520116]
state [[-0.63381445]
[ 0.0051082 ]]
[ 1.88258195]
state [[-0.62701201]
[ 0.00680242]]
[ 1.94709766]
state [[-0.61849928]
[ 0.00851273]]
[ 1.99845946]
state [[-0.6082859 ]
[ 0.01021337]]
[ 2.02001953]
state [[-0.59644419]
[ 0.01184171]]
[ 2.03985977]
state [[-0.5830605 ]
[ 0.01338372]]
[ 2.061131]
state [[-0.56823319]
[ 0.01482732]]
[ 2.08677554]
state [[-0.55207211]
[ 0.01616108]]
[ 2.11229753]
state [[-0.53469777]
[ 0.01737437]]
[ 2.12007475]
state [[-0.51624018]
[ 0.01845759]]
[ 2.12015772]
state [[-0.49683776]
[ 0.01940241]]
[ 2.11862659]
state [[-0.47663584]
[ 0.02020192]]
[ 2.11900949]
state [[-0.45578498]
[ 0.02085086]]
[ 2.12339568]
state [[-0.43443921]
[ 0.02134576]]
[ 2.13058949]
state [[-0.41275421]
[ 0.02168501]]
[ 2.1379447]
state [[-0.39088529]
[ 0.02186891]]
[ 2.11188602]
state [[-0.36898565]
[ 0.02189965]]
[ 2.07888842]
state [[-0.34720448]
[ 0.02178118]]
[ 2.02200246]
state [[-0.32568535]
[ 0.02151911]]
[ 1.96119642]
state [[-0.3046037 ]
[ 0.02108165]]
[ 1.88524187]
state [[-0.28416365]
[ 0.02044006]]
[ 1.79912448]
state [[-0.26456976]
[ 0.01959391]]
[ 1.710392]
state [[-0.24601847]
[ 0.01855129]]
[ 1.63184869]
state [[-0.22868478]
[ 0.01733369]]
[ 1.55272114]
state [[-0.21273275]
[ 0.01595203]]
[ 1.48924494]
state [[-0.19829939]
[ 0.01443335]]
[ 1.42644298]
state [[-0.18551011]
[ 0.01278928]]
[ 1.34886146]
state [[-0.17449471]
[ 0.01101541]]
[ 1.27954304]
state [[-0.16536497]
[ 0.00912974]]
[ 1.20916033]
state [[-0.15822469]
[ 0.00714028]]
[ 1.1254828]
state [[-0.15318252]
[ 0.00504216]]
[ 1.04072833]
state [[-0.15034026]
[ 0.00284226]]
[ 0.95400673]
state [[-0.149794 ]
[ 0.00054626]]
[ 0.85403168]
state [[-0.1516455]
[-0.0018515]]
[ 0.74172854]
state [[-0.15600099]
[-0.0043555 ]]
[ 0.62716573]
state [[-0.1629605 ]
[-0.00695951]]
[ 0.49289092]
state [[-0.17263426]
[-0.00967376]]
[ 0.36739182]
state [[-0.18511277]
[-0.01247852]]
[ 0.27979702]
state [[-0.2004358 ]
[-0.01532303]]
[ 0.25043076]
state [[-0.21856989]
[-0.01813409]]
[ 0.24488688]
state [[-0.23944063]
[-0.02087074]]
[ 0.27740929]
state [[-0.26291624]
[-0.02347561]]
[ 0.29812339]
state [[-0.28885555]
[-0.02593932]]
[ 0.30250067]
state [[-0.31711099]
[-0.02825544]]
[ 0.29809031]
state [[-0.34751981]
[-0.03040884]]
[ 0.26403618]
state [[-0.37992465]
[-0.03240484]]
[ 0.21001799]
state [[-0.41416398]
[-0.03423932]]
[ 0.14926517]
state [[-0.45006013]
[-0.03589615]]
[ 0.11170115]
state [[-0.48739165]
[-0.03733153]]
[ 0.12955034]
state [[-0.52586466]
[-0.038473 ]]
[ 0.18821153]
state [[-0.56513244]
[-0.03926779]]
[ 0.26281732]
state [[-0.60482669]
[-0.03969428]]
[ 0.31775868]
state [[-0.64460003]
[-0.03977332]]
[ 0.36201727]
state [[-0.68412364]
[-0.03952359]]
[ 0.40595961]
state [[-0.72308332]
[-0.03895969]]
[ 0.46113184]
state [[-0.76117349]
[-0.03809015]]
[ 0.50612468]
state [[-0.79812276]
[-0.03694928]]
[ 0.54107523]
state [[-0.83369702]
[-0.03557426]]
[ 0.5794608]
state [[-0.86768734]
[-0.03399031]]
[ 0.61941868]
state [[-0.89991206]
[-0.03222473]]
[ 0.6620872]
state [[-0.93021482]
[-0.03030275]]
[ 0.69497317]
state [[-0.95847499]
[-0.02826016]]
[ 0.71488827]
state [[-0.98460829]
[-0.0261333 ]]
[ 0.72271246]
state [[-1.0085628 ]
[-0.02395453]]
[ 0.72426713]
state [[-1.0303098 ]
[-0.02174704]]
[ 0.73960996]
state [[-1.04982042]
[-0.01951064]]
[ 0.76504427]
state [[-1.06706607]
[-0.01724567]]
[ 0.79581082]
state [[-1.0820204]
[-0.0149543]]
[ 0.82765263]
state [[-1.09466064]
[-0.01264028]]
[ 0.85975057]
state [[-1.10496652]
[-0.01030583]]
[ 0.8948698]
state [[-1.11291492]
[-0.00794841]]
[ 0.95584047]
state [[-1.11845589]
[-0.00554099]]
[ 1.04231954]
state [[-1.12151146]
[-0.00305558]]
[ 1.14815998]
state [[ -1.12198079e+00]
[ -4.69294842e-04]]
[ 1.26677179]
state [[-1.11974597]
[ 0.00223482]]
[ 1.40466595]
state [[-1.11466551]
[ 0.00508051]]
[ 1.54654884]
state [[-1.10658944]
[ 0.00807603]]
[ 1.69615591]
state [[-1.09535682]
[ 0.0112326 ]]
[ 1.82103276]
state [[-1.08082926]
[ 0.01452759]]
[ 1.91005015]
state [[-1.06290436]
[ 0.01792493]]
[ 1.96082079]
state [[-1.04152143]
[ 0.02138297]]
[ 1.93498993]
state [[-1.01670384]
[ 0.0248176 ]]
[ 1.87701082]
state [[-0.98851967]
[ 0.02818416]]
[ 1.76854169]
state [[-0.95710564]
[ 0.03141406]]
[ 1.65950978]
state [[-0.9226228 ]
[ 0.03448281]]
[ 1.55372369]
state [[-0.88525885]
[ 0.03736397]]
[ 1.45910609]
state [[-0.84522504]
[ 0.04003381]]
[ 1.36636269]
state [[-0.8027699 ]
[ 0.04245513]]
[ 1.27794611]
state [[-0.75817937]
[ 0.04459053]]
[ 1.26264024]
state [[-0.71170849]
[ 0.04647085]]
[ 1.28282201]
state [[-0.66361767]
[ 0.0480908 ]]
[ 1.33445764]
state [[-0.61417288]
[ 0.04944479]]
[ 1.39154661]
state [[-0.56366557]
[ 0.05050731]]
[ 1.44904864]
state [[-0.51240945]
[ 0.05125614]]
[ 1.50300848]
state [[-0.46073422]
[ 0.05167524]]
[ 1.52589905]
state [[-0.40900177]
[ 0.05173245]]
[ 1.57586896]
state [[-0.35753611]
[ 0.05146567]]
[ 1.68121815]
state [[-0.30658382]
[ 0.0509523 ]]
[ 1.78724825]
state [[-0.25635931]
[ 0.05022451]]
[ 1.86548579]
state [[-0.2070657 ]
[ 0.04929361]]
[ 1.90897322]
state [[-0.15889607]
[ 0.04816963]]
[ 1.87446404]
state [[-0.11207328]
[ 0.04682279]]
[ 1.75029409]
state [[-0.06686021]
[ 0.04521307]]
[ 1.59914041]
state [[-0.02349788]
[ 0.04336233]]
[ 1.48269701]
state [[ 0.01785335]
[ 0.04135124]]
[ 1.43003273]
state [[ 0.05713821]
[ 0.03928486]]
[ 1.39204776]
state [[ 0.09435175]
[ 0.03721354]]
[ 1.3833642]
state [[ 0.12954815]
[ 0.03519639]]
[ 1.39972341]
state [[ 0.16283071]
[ 0.03328256]]
[ 1.42482388]
state [[ 0.19433048]
[ 0.03149978]]
[ 1.45190954]
state [[ 0.22419512]
[ 0.02986464]]
[ 1.47354007]
state [[ 0.25257778]
[ 0.02838264]]
[ 1.48035944]
state [[ 0.27962479]
[ 0.02704701]]
[ 1.47312772]
state [[ 0.30547419]
[ 0.02584939]]
[ 1.46018779]
state [[ 0.33026212]
[ 0.02478792]]
[ 1.44439828]
state [[ 0.35412437]
[ 0.02386224]]
[ 1.41477597]
state [[ 0.37718439]
[ 0.02306001]]
[ 1.38616776]
state [[ 0.39956743]
[ 0.02238305]]
[ 1.36028767]
state [[ 0.42140183]
[ 0.02183442]]
[ 1.33634007]
state [[ 0.44281805]
[ 0.02141623]]
[ 1.31379545]
state [[ 0.46394813]
[ 0.02113008]]
[ 1.29190838]
state [[ 0.48492512]
[ 0.020977 ]]
[ 1.27145255]
episode length using learned policy: 189
result after minibatch no. 1000 : mean squared error: 0.198283180594
batch train data [[ 0.19408099 0.01538069]
[-0.26362413 0.06752013]
[-0.69951605 -0.05385464]
[-0.54738126 0.06748359]
[-0.21778464 -0.04865082]
[ 0.54335574 -0.00828046]
[ 0.16748455 0.00280113]
[-0.06892529 -0.00844221]
[-0.10153063 -0.05968317]
[-0.41294398 -0.0579001 ]
[-0.66986299 0.03172919]
[-0.0586107 0.05916397]
[-0.08222334 -0.0135419 ]
[-0.90261604 -0.06364673]
[-0.97258324 0.06179676]
[ 0.19117184 0.03567195]
[-0.0080051 0.0375999 ]
[-0.64827353 0.01346781]
[-0.88261342 -0.00587628]
[-0.75137338 -0.04300917]
[-0.8687 0.03963439]
[-0.22868821 0.00709976]
[-1.13098404 0.06483648]
[-0.74003207 -0.05979572]
[-0.57187377 0.04138672]
[-0.29682503 -0.00259221]
[-0.66917254 -0.03213424]
[ 0.07403914 0.06043526]
[-0.70054166 0.03214374]
[-1.0183053 0.02693183]
[-0.99503129 -0.01189531]
[-0.90651716 -0.02915869]
[-0.35015333 -0.01805869]
[ 0.52430298 -0.00194499]
[-0.57880194 0.0648794 ]
[ 0.49329809 0.05705563]
[ 0.22506749 0.02533672]
[-0.02199518 -0.0513784 ]
[ 0.35352841 -0.03391049]
[-1.05622524 -0.05788412]
[ 0.05851025 -0.01808364]
[-0.90433694 0.03211061]
[ 0.18065786 0.02464932]
[-0.82108 0.00553891]
[-0.70791267 0.02753616]
[ 0.52870181 -0.03133109]
[-0.20636319 -0.038933 ]
[-0.84686268 0.03173926]
[ 0.22102241 -0.00582173]
[-0.29220623 -0.03172117]]
batch train labels [[ 1.64996572]
[ 0.99983717]
[ 0.10847247]
[ 1.00617072]
[ 0.77621888]
[ 1.11007973]
[ 0.9348839 ]
[ 0.81870769]
[ 1. ]
[ 1.00455976]
[ 1.04543336]
[ 1. ]
[ 0.81870769]
[ 0.93533027]
[ 1. ]
[ 1.36786012]
[ 1.43590492]
[ 3.71232037]
[ 1.14473635]
[ 0.10847247]
[ 2.59707689]
[ 1.45858352]
[ 1. ]
[ 0.44594718]
[ 1.22226744]
[ 0.58241356]
[ 0.71581806]
[ 1. ]
[ 1.04543336]
[ 1.55615993]
[ 1.14473635]
[ 0.6045682 ]
[ 0.23244138]
[ 1.11007973]
[ 1.00617072]
[ 1. ]
[ 1.64996572]
[ 1.04137001]
[ 1. ]
[ 1. ]
[ 0.42664555]
[ 2.59707689]
[ 1.64996572]
[ 1.23673655]
[ 1.43051834]
[ 1. ]
[ 0.80518055]
[ 2.59707689]
[ 0.69129624]
[ 0.80518055]]
plotting the mu() policy learned by NN
state [-0.400759 0. ]
[ 0.8454169]
state [[-0.40181419]
[-0.00105517]]
[ 0.74972928]
state [[-0.40401283]
[-0.00219864]]
[ 0.65955544]
state [[-0.4074297 ]
[-0.00341686]]
[ 0.56788349]
state [[-0.41213241]
[-0.00470272]]
[ 0.47617376]
state [[-0.41817945]
[-0.00604705]]
[ 0.38765293]
state [[-0.42561638]
[-0.00743693]]
[ 0.31512311]
state [[-0.43446252]
[-0.00884613]]
[ 0.24732837]
state [[-0.44472191]
[-0.01025939]]
[ 0.18647118]
state [[-0.4563809 ]
[-0.01165899]]
[ 0.1339272]
state [[-0.46940669]
[-0.01302578]]
[ 0.09544826]
state [[-0.48374167]
[-0.01433499]]
[ 0.06162274]
state [[-0.49931327]
[-0.01557158]]
[ 0.05248022]
state [[-0.51601434]
[-0.01670108]]
[ 0.06062867]
state [[-0.53371167]
[-0.01769733]]
[ 0.08152326]
state [[-0.55225164]
[-0.01853998]]
[ 0.11243049]
state [[-0.57146454]
[-0.01921291]]
[ 0.15268403]
state [[-0.59116703]
[-0.01970247]]
[ 0.20369369]
state [[-0.61116248]
[-0.01999548]]
[ 0.26516563]
state [[-0.63124359]
[-0.02008111]]
[ 0.33433223]
state [[-0.65119702]
[-0.0199534 ]]
[ 0.40573651]
state [[-0.67081088]
[-0.01961388]]
[ 0.47530657]
state [[-0.68988091]
[-0.01907002]]
[ 0.5444206]
state [[-0.70821047]
[-0.01832957]]
[ 0.61750972]
state [[-0.72560763]
[-0.01739718]]
[ 0.69116521]
state [[-0.74188966]
[-0.016282 ]]
[ 0.75286794]
state [[-0.75689614]
[-0.01500648]]
[ 0.82314968]
state [[-0.77046913]
[-0.013573 ]]
[ 0.88408768]
state [[-0.78247118]
[-0.01200206]]
[ 0.93808472]
state [[-0.79278296]
[-0.0103118 ]]
[ 0.99054658]
state [[-0.80129772]
[-0.00851476]]
[ 1.05511117]
state [[-0.80790734]
[-0.0066096 ]]
[ 1.12503552]
state [[-0.81250888]
[-0.00460155]]
[ 1.19931185]
state [[-0.8150056]
[-0.0024967]]
[ 1.28484249]
state [[ -8.15299869e-01]
[ -2.94243684e-04]]
[ 1.36569667]
state [[-0.81330937]
[ 0.00199048]]
[ 1.44791555]
state [[-0.80896157]
[ 0.00434781]]
[ 1.53157282]
state [[-0.802194 ]
[ 0.0067676]]
[ 1.62334299]
state [[-0.79294848]
[ 0.0092455 ]]
[ 1.71142864]
state [[-0.7811842 ]
[ 0.01176428]]
[ 1.78872216]
state [[-0.76688594]
[ 0.01429828]]
[ 1.8229239]
state [[-0.75009781]
[ 0.01678812]]
[ 1.81457436]
state [[-0.73092413]
[ 0.0191737 ]]
[ 1.77558672]
state [[-0.70951825]
[ 0.02140589]]
[ 1.74696541]
state [[-0.68604219]
[ 0.02347607]]
[ 1.73255277]
state [[-0.66066289]
[ 0.0253793 ]]
[ 1.70939827]
state [[-0.63357496]
[ 0.02708795]]
[ 1.71081376]
state [[-0.60496628]
[ 0.02860871]]
[ 1.73272872]
state [[-0.57502061]
[ 0.02994565]]
[ 1.7695924]
state [[-0.54392123]
[ 0.03109938]]
[ 1.8160634]
state [[-0.51185346]
[ 0.03206776]]
[ 1.86594677]
state [[-0.47900781]
[ 0.03284564]]
[ 1.90997207]
state [[-0.44558564]
[ 0.03342218]]
[ 1.9492389]
state [[-0.41179401]
[ 0.03379164]]
[ 1.97910154]
state [[-0.37784615]
[ 0.03394784]]
[ 1.99951208]
state [[-0.34395742]
[ 0.03388871]]
[ 2.04470634]
state [[-0.31035173]
[ 0.03360568]]
[ 2.10001659]
state [[-0.27723852]
[ 0.03311321]]
[ 2.10783911]
state [[-0.24480933]
[ 0.03242919]]
[ 2.00920987]
state [[-0.21323568]
[ 0.03157365]]
[ 1.88880587]
state [[-0.18277889]
[ 0.03045678]]
[ 1.75638711]
state [[-0.1536992 ]
[ 0.02907969]]
[ 1.60263145]
state [[-0.1262558 ]
[ 0.02744341]]
[ 1.47888064]
state [[-0.10065631]
[ 0.02559949]]
[ 1.40678418]
state [[-0.07703692]
[ 0.02361939]]
[ 1.36455178]
state [[-0.05548651]
[ 0.02155041]]
[ 1.35784924]
state [[-0.0360437 ]
[ 0.01944281]]
[ 1.33502984]
state [[-0.01875125]
[ 0.01729244]]
[ 1.29567146]
state [[-0.00365918]
[ 0.01509207]]
[ 1.24375355]
state [[ 0.00917679]
[ 0.01283597]]
[ 1.19453537]
state [[ 0.01970825]
[ 0.01053146]]
[ 1.12120318]
state [[ 0.02786528]
[ 0.00815703]]
[ 1.02793002]
state [[ 0.03355896]
[ 0.00569369]]
[ 0.92969298]
state [[ 0.03669501]
[ 0.00313604]]
[ 0.82264668]
state [[ 0.03716883]
[ 0.00047382]]
[ 0.71289003]
state [[ 0.03487106]
[-0.00229776]]
[ 0.61176306]
state [[ 0.02969873]
[-0.00517233]]
[ 0.53471714]
state [[ 0.02157103]
[-0.0081277 ]]
[ 0.4931739]
state [[ 0.01044174]
[-0.01112929]]
[ 0.47299272]
state [[-0.00371333]
[-0.01415507]]
[ 0.45618373]
state [[-0.02091207]
[-0.01719873]]
[ 0.43597105]
state [[-0.04116991]
[-0.02025785]]
[ 0.41978681]
state [[-0.06448893]
[-0.02331902]]
[ 0.41140181]
state [[-0.0908499 ]
[-0.02636097]]
[ 0.43993157]
state [[-0.12017865]
[-0.02932876]]
[ 0.47995979]
state [[-0.15236673]
[-0.03218807]]
[ 0.51214409]
state [[-0.18728599]
[-0.03491927]]
[ 0.51103419]
state [[-0.22480989]
[-0.0375239 ]]
[ 0.49400508]
state [[-0.26479244]
[-0.03998255]]
[ 0.42859548]
state [[-0.30709821]
[-0.04230578]]
[ 0.35510159]
state [[-0.35156086]
[-0.04446265]]
[ 0.29218289]
state [[-0.3979651 ]
[-0.04640423]]
[ 0.20603457]
state [[-0.4460834]
[-0.0481183]]
[ 0.13115722]
state [[-0.49564669]
[-0.04956328]]
[ 0.05594841]
state [[-0.54636341]
[-0.05071673]]
[ 0.02568084]
state [[-0.59788388]
[-0.05152044]]
[ 0.04049608]
state [[-0.64981127]
[-0.05192741]]
[ 0.12528047]
state [[-0.70168924]
[-0.05187799]]
[ 0.19046049]
state [[-0.75310373]
[-0.0514145 ]]
[ 0.25736794]
state [[-0.80367237]
[-0.05056865]]
[ 0.30408436]
state [[-0.85307497]
[-0.04940259]]
[ 0.35497862]
state [[-0.90103465]
[-0.0479597 ]]
[ 0.3920157]
state [[-0.94733888]
[-0.0463042 ]]
[ 0.4557811]
state [[-0.99179864]
[-0.04445977]]
[ 0.51783186]
state [[-1.03427505]
[-0.04247638]]
[ 0.56992829]
state [[-1.07468343]
[-0.04040833]]
[ 0.6156922]
state [[-1.11298454]
[-0.03830113]]
[ 0.64913231]
state [[-1.14918506]
[-0.03620053]]
[ 0.66365087]
state [[-1.18333805]
[-0.03415298]]
[ 0.67717147]
state [-1.2 0. ]
[ 1.7080965]
state [[-1.19705009]
[ 0.00294999]]
[ 1.87566102]
state [[-1.19097281]
[ 0.00607725]]
[ 2.04678702]
state [[-1.18162453]
[ 0.00934828]]
[ 2.21445584]
state [[-1.16897678]
[ 0.01264773]]
[ 2.33149314]
state [[-1.15299404]
[ 0.01598274]]
[ 2.36781287]
state [[-1.13363612]
[ 0.01935787]]
[ 2.30819535]
state [[-1.11086178]
[ 0.02277428]]
[ 2.20528436]
state [[-1.08463299]
[ 0.02622882]]
[ 2.06964922]
state [[-1.05491996]
[ 0.02971307]]
[ 1.91466725]
state [[-1.02179289]
[ 0.03312707]]
[ 1.73796284]
state [[-0.98543513]
[ 0.03635778]]
[ 1.5900197]
state [[-0.94603014]
[ 0.039405 ]]
[ 1.48336959]
state [[-0.90375602]
[ 0.04227411]]
[ 1.38340771]
state [[-0.8588264]
[ 0.0449296]]
[ 1.27272677]
state [[-0.81151277]
[ 0.04731365]]
[ 1.19182146]
state [[-0.7621066 ]
[ 0.04940616]]
[ 1.17314494]
state [[-0.71088725]
[ 0.05121934]]
[ 1.22188699]
state [[-0.65811414]
[ 0.05277314]]
[ 1.30223596]
state [[-0.60405707]
[ 0.05405708]]
[ 1.3824203]
state [[-0.54901999]
[ 0.0550371 ]]
[ 1.45421827]
state [[-0.4933382 ]
[ 0.05568179]]
[ 1.49174392]
state [[-0.43739131]
[ 0.05594689]]
[ 1.53892195]
state [[-0.38154486]
[ 0.05584644]]
[ 1.58752894]
state [[-0.32614434]
[ 0.05540052]]
[ 1.67884076]
state [[-0.27146077]
[ 0.05468356]]
[ 1.79155445]
state [[-0.21770145]
[ 0.05375932]]
[ 1.88272452]
state [[-0.1650449 ]
[ 0.05265654]]
[ 1.8440212]
state [[-0.11374411]
[ 0.0513008 ]]
[ 1.74382043]
state [[-0.06405535]
[ 0.04968876]]
[ 1.56315899]
state [[-0.01625742]
[ 0.04779794]]
[ 1.44210553]
state [[ 0.0294856 ]
[ 0.04574301]]
[ 1.3720727]
state [[ 0.07311046]
[ 0.04362486]]
[ 1.31934845]
state [[ 0.11461456]
[ 0.0415041 ]]
[ 1.31584132]
state [[ 0.15408084]
[ 0.03946628]]
[ 1.34713769]
state [[ 0.19165662]
[ 0.03757578]]
[ 1.37922776]
state [[ 0.22751361]
[ 0.03585699]]
[ 1.39271331]
state [[ 0.26182339]
[ 0.03430977]]
[ 1.40289545]
state [[ 0.29476842]
[ 0.03294503]]
[ 1.41542029]
state [[ 0.32654428]
[ 0.03177588]]
[ 1.41382253]
state [[ 0.35734066]
[ 0.03079638]]
[ 1.39983368]
state [[ 0.38734099]
[ 0.03000034]]
[ 1.38380504]
state [[ 0.41673142]
[ 0.02939044]]
[ 1.36145902]
state [[ 0.44569546]
[ 0.02896405]]
[ 1.34559715]
state [[ 0.47442615]
[ 0.02873068]]
[ 1.33020258]
episode length using learned policy: 154
result after minibatch no. 1500 : mean squared error: 0.291077494621
batch train data [[-0.45674148 0.04850654]
[ 0.07960274 0.05785116]
[-0.63282794 0.05586188]
[-1.01407494 0.0056222 ]
[ 0.59575954 -0.00943674]
[ 0.08577363 -0.05966252]
[-0.54093458 0.00251209]
[-1.11985041 -0.01634606]
[ 0.15217417 0.06627439]
[ 0.30109084 0.05462557]
[-0.34926965 0.04066041]
[ 0.06299382 0.02747415]
[ 0.03052917 0.00383546]
[-0.55201652 0.03899607]
[-0.19382481 0.03184514]
[ 0.5590733 0.02854913]
[-1.03774226 -0.01445686]
[ 0.2558647 0.06901892]
[-0.25933297 0.03455281]
[-0.53082896 0.0067104 ]
[-0.86368971 0.02627921]
[-0.966329 -0.06499921]
[-0.56033131 -0.05465473]
[-1.0252503 -0.06098615]
[ 0.52246296 -0.06373296]
[-0.70852903 0.00279722]
[-0.02906379 -0.05610288]
[-0.33932048 -0.06215446]
[-0.3306278 0.06478642]
[-0.59861868 0.0120857 ]
[ 0.45942425 0.04877555]
[-0.71169213 0.02310571]
[-0.29433861 0.03517625]
[-1.16071462 -0.03573918]
[-0.85031473 0.03448067]
[-0.23882077 -0.06870309]
[-0.50455797 -0.06870677]
[-0.19873122 -0.01613799]
[-0.5936218 -0.03673533]
[-0.45550651 -0.04837423]
[-1.12067334 -0.03228243]
[-0.68179396 0.0484669 ]
[-0.05982913 -0.02082225]
[ 0.48195846 0.01317989]
[ 0.24833191 -0.04386057]
[-1.17691581 -0.04661779]
[-0.56383007 -0.04595739]
[-0.14469759 -0.03191648]
[-0.55750294 -0.05380017]
[-0.66480739 -0.03915136]]
batch train labels [[ 1.89625168]
[ 1. ]
[ 2.06895135]
[ 1.17997436]
[ 1.11007973]
[ 1. ]
[ 3.71232037]
[ 0.62209078]
[ 1. ]
[ 1. ]
[ 1.31216807]
[ 1.64996572]
[ 0.94528861]
[ 1.22226744]
[ 1.40786321]
[ 1.0021732 ]
[ 0.62209078]
[ 1. ]
[ 1.40786321]
[ 3.71232037]
[ 1.55615993]
[ 0.93533027]
[-0.14023047]
[ 1. ]
[ 1. ]
[ 1.23673655]
[ 1. ]
[ 1.00455976]
[ 1.05661102]
[ 3.71232037]
[ 1. ]
[ 1.43051834]
[ 1.40786321]
[ 0.62276976]
[ 2.59707689]
[ 0.99939357]
[ 0.06202713]
[ 0.11962489]
[ 0.46513577]
[-0.18160138]
[ 0.62276976]
[ 0.61158972]
[ 0.42664555]
[ 0.9828954 ]
[ 1. ]
[ 0.67835728]
[-0.14023047]
[ 0.80518055]
[-0.14023047]
[ 0.71581806]]
plotting the mu() policy learned by NN
state [-0.56982524 0. ]
[ 1.74395084]
state [[-0.56873572]
[ 0.00108954]]
[ 1.8267324]
state [[-0.56648195]
[ 0.00225377]]
[ 1.89960682]
state [[-0.56300783]
[ 0.00347411]]
[ 1.96963656]
state [[-0.5582692 ]
[ 0.00473863]]
[ 2.02621937]
state [[-0.55227101]
[ 0.00599819]]
[ 2.0737884]
state [[-0.54505807]
[ 0.00721296]]
[ 2.11290669]
state [[-0.53668427]
[ 0.0083738 ]]
[ 2.14585066]
state [[-0.52721238]
[ 0.00947191]]
[ 2.17149878]
state [[-0.51671338]
[ 0.01049901]]
[ 2.20558953]
state [[-0.50526601]
[ 0.01144738]]
[ 2.23278236]
state [[-0.49295607]
[ 0.01230995]]
[ 2.25000262]
state [[-0.47987562]
[ 0.01308045]]
[ 2.25104785]
state [[-0.46612215]
[ 0.01375347]]
[ 2.24740505]
state [[-0.45179763]
[ 0.01432453]]
[ 2.23600864]
state [[-0.43700746]
[ 0.01479017]]
[ 2.21314502]
state [[-0.42185944]
[ 0.01514802]]
[ 2.17230296]
state [[-0.40646267]
[ 0.01539677]]
[ 2.12860632]
state [[-0.39092645]
[ 0.01553621]]
[ 2.08087134]
state [[-0.37535921]
[ 0.01556724]]
[ 2.03362751]
state [[-0.35986748]
[ 0.01549173]]
[ 1.97889018]
state [[-0.34457606]
[ 0.01529142]]
[ 1.91865182]
state [[-0.32964504]
[ 0.01493102]]
[ 1.8628124]
state [[-0.31522515]
[ 0.01441989]]
[ 1.7893703]
state [[-0.30147889]
[ 0.01374627]]
[ 1.70238888]
state [[-0.28857556]
[ 0.01290334]]
[ 1.60350156]
state [[-0.27668893]
[ 0.01188662]]
[ 1.50279224]
state [[-0.26598659]
[ 0.01070235]]
[ 1.40121281]
state [[-0.25662845]
[ 0.00935814]]
[ 1.31775784]
state [[-0.24874753]
[ 0.00788092]]
[ 1.23903143]
state [[-0.24246319]
[ 0.00628434]]
[ 1.14905322]
state [[-0.23789708]
[ 0.00456611]]
[ 1.04378402]
state [[-0.23517707]
[ 0.00272002]]
[ 0.92093688]
state [[-0.23443928]
[ 0.00073779]]
[ 0.78435165]
state [[-0.2358239 ]
[-0.00138461]]
[ 0.65105188]
state [[-0.23945747]
[-0.00363358]]
[ 0.53205454]
state [[-0.2454412 ]
[-0.00598372]]
[ 0.408795]
state [[-0.25386849]
[-0.00842728]]
[ 0.30575001]
state [[-0.26479936]
[-0.01093085]]
[ 0.20355143]
state [[-0.27827844]
[-0.01347909]]
[ 0.1319042]
state [[-0.29430386]
[-0.01602544]]
[ 0.10888037]
state [[-0.31280768]
[-0.01850381]]
[ 0.08669588]
state [[-0.33370245]
[-0.02089477]]
[ 0.06811915]
state [[-0.35687754]
[-0.02317508]]
[ 0.06247708]
state [[-0.38218907]
[-0.02531152]]
[ 0.03855994]
state [[-0.40949106]
[-0.02730201]]
[ 0.0108965]
state [[-0.43862137]
[-0.0291303 ]]
[-0.00274265]
state [[-0.46938214]
[-0.03076075]]
[-0.01346433]
state [[-0.50154769]
[-0.03216559]]
[-0.00733116]
state [[-0.53487855]
[-0.03333085]]
[ 0.02973598]
state [[-0.56909508]
[-0.03421653]]
[ 0.08908926]
state [[-0.60388237]
[-0.03478728]]
[ 0.15041135]
state [[-0.63892293]
[-0.03504055]]
[ 0.18497579]
state [[-0.6739307 ]
[-0.03500779]]
[ 0.22367698]
state [[-0.70862514]
[-0.03469446]]
[ 0.26619771]
state [[-0.74273586]
[-0.03411074]]
[ 0.29688388]
state [[-0.77602202]
[-0.03328618]]
[ 0.32465613]
state [[-0.80826616]
[-0.03224417]]
[ 0.37416327]
state [[-0.8392514 ]
[-0.03098522]]
[ 0.43883756]
state [[-0.86876869]
[-0.02951728]]
[ 0.49000639]
state [[-0.89664567]
[-0.02787696]]
[ 0.50578588]
state [[-0.92276752]
[-0.02612186]]
[ 0.52331483]
state [[-0.94703823]
[-0.02427072]]
[ 0.51490563]
state [[-0.96940607]
[-0.02236782]]
[ 0.50544465]
state [[-0.98983622]
[-0.02043015]]
[ 0.50881934]
state [[-1.00829446]
[-0.01845826]]
[ 0.51950794]
state [[-1.02475023]
[-0.01645575]]
[ 0.53843987]
state [[-1.03917325]
[-0.01442298]]
[ 0.60582638]
state [[-1.05149114]
[-0.01231788]]
[ 0.70930642]
state [[-1.06159997]
[-0.01010878]]
[ 0.84608299]
state [[-1.06936502]
[-0.00776503]]
[ 0.99160194]
state [[-1.07464397]
[-0.00527895]]
[ 1.14951408]
state [[-1.07728183]
[-0.00263791]]
[ 1.312374]
state [[ -1.07711756e+00]
[ 1.64288329e-04]]
[ 1.47201788]
state [[-1.0739913 ]
[ 0.00312624]]
[ 1.63254893]
state [[-1.06774056]
[ 0.00625072]]
[ 1.78021657]
state [[-1.05821443]
[ 0.00952619]]
[ 1.90066719]
state [[-1.04528892]
[ 0.01292549]]
[ 1.91618741]
state [[-1.02894723]
[ 0.01634164]]
[ 1.87103009]
state [[-1.00923836]
[ 0.01970892]]
[ 1.78965473]
state [[-0.986256 ]
[ 0.02298239]]
[ 1.6742748]
state [[-0.960141 ]
[ 0.02611499]]
[ 1.53665423]
state [[-0.93107414]
[ 0.02906687]]
[ 1.40368235]
state [[-0.89925379]
[ 0.03182038]]
[ 1.29205191]
state [[-0.86488354]
[ 0.03437021]]
[ 1.21363449]
state [[-0.8281644 ]
[ 0.03671915]]
[ 1.12374151]
state [[-0.78934211]
[ 0.03882231]]
[ 1.02930892]
state [[-0.74870193]
[ 0.04064018]]
[ 0.94601458]
state [[-0.70655286]
[ 0.04214904]]
[ 0.92903829]
state [[-0.66317052]
[ 0.04338237]]
[ 0.99666345]
state [[-0.61877501]
[ 0.0443955 ]]
[ 1.07983351]
state [[-0.57359552]
[ 0.04517949]]
[ 1.17630339]
state [[-0.52786613]
[ 0.04572937]]
[ 1.27305841]
state [[-0.4818317 ]
[ 0.04603443]]
[ 1.32580829]
state [[-0.43578389]
[ 0.04604781]]
[ 1.37580955]
state [[-0.39001131]
[ 0.0457726 ]]
[ 1.45826983]
state [[-0.34475574]
[ 0.04525556]]
[ 1.60075748]
state [[-0.30017731]
[ 0.04457843]]
[ 1.73247659]
state [[-0.25641939]
[ 0.04375793]]
[ 1.81896627]
state [[-0.21363857]
[ 0.04278082]]
[ 1.79878116]
state [[-0.17206284]
[ 0.04157573]]
[ 1.66912258]
state [[-0.13199225]
[ 0.04007059]]
[ 1.50331795]
state [[-0.09372489]
[ 0.03826736]]
[ 1.36186373]
state [[-0.0574975 ]
[ 0.03622739]]
[ 1.27615392]
state [[-0.02345685]
[ 0.03404065]]
[ 1.25314677]
state [[ 0.00834313]
[ 0.03179998]]
[ 1.27929306]
state [[ 0.03792319]
[ 0.02958006]]
[ 1.30609918]
state [[ 0.06532551]
[ 0.02740232]]
[ 1.31685901]
state [[ 0.09059255]
[ 0.02526703]]
[ 1.31132305]
state [[ 0.11376267]
[ 0.02317012]]
[ 1.32957602]
state [[ 0.13490656]
[ 0.02114388]]
[ 1.35815954]
state [[ 0.15411057]
[ 0.01920401]]
[ 1.36865497]
state [[ 0.1714457 ]
[ 0.01733513]]
[ 1.35257304]
state [[ 0.18695685]
[ 0.01551115]]
[ 1.29121161]
state [[ 0.20064224]
[ 0.01368538]]
[ 1.23494565]
state [[ 0.21250196]
[ 0.01185971]]
[ 1.18209171]
state [[ 0.22253481]
[ 0.01003285]]
[ 1.12982881]
state [[ 0.23073421]
[ 0.00819941]]
[ 1.08187592]
state [[ 0.2370909 ]
[ 0.00635668]]
[ 1.02246881]
state [[ 0.24157622]
[ 0.00448532]]
[ 0.96051872]
state [[ 0.24415037]
[ 0.00257414]]
[ 0.88929617]
state [[ 0.24475497]
[ 0.00060459]]
[ 0.83202487]
state [[ 0.24333578]
[-0.00141919]]
[ 0.78516638]
state [[ 0.23983884]
[-0.00349695]]
[ 0.76017684]
state [[ 0.23422176]
[-0.00561708]]
[ 0.74682933]
state [[ 0.22644369]
[-0.00777806]]
[ 0.74038106]
state [[ 0.21646103]
[-0.00998266]]
[ 0.7366811]
state [[ 0.20422392]
[-0.01223712]]
[ 0.73239964]
state [[ 0.1896739 ]
[-0.01455001]]
[ 0.7332648]
state [[ 0.17275108]
[-0.01692281]]
[ 0.73279816]
state [[ 0.15338935]
[-0.01936173]]
[ 0.73383826]
state [[ 0.13152151]
[-0.02186784]]
[ 0.73527014]
state [[ 0.10708103]
[-0.02444048]]
[ 0.72413123]
state [[ 0.07999257]
[-0.02708846]]
[ 0.7041688]
state [[ 0.05017992]
[-0.02981265]]
[ 0.68753684]
state [[ 0.01758309]
[-0.03259683]]
[ 0.65205777]
state [[-0.01785821]
[-0.0354413 ]]
[ 0.66696626]
state [[-0.05612895]
[-0.03827075]]
[ 0.6936813]
state [[-0.09717066]
[-0.04104171]]
[ 0.73911709]
state [[-0.14086777]
[-0.04369712]]
[ 0.77155739]
state [[-0.18707338]
[-0.04620562]]
[ 0.7613948]
state [[-0.23563412]
[-0.04856074]]
[ 0.73009008]
state [[-0.28636572]
[-0.05073159]]
[ 0.67634618]
state [[-0.33905378]
[-0.05268805]]
[ 0.60570353]
state [[-0.39345059]
[-0.0543968 ]]
[ 0.53567272]
state [[-0.44926322]
[-0.05581263]]
[ 0.42292958]
state [[-0.5062058 ]
[-0.05694261]]
[ 0.31080198]
state [[-0.563968 ]
[-0.05776219]]
[ 0.23212197]
state [[-0.62219602]
[-0.05822804]]
[ 0.21745999]
state [[-0.68047786]
[-0.05828184]]
[ 0.23750418]
state [[-0.73838854]
[-0.0579107 ]]
[ 0.32655445]
state [[-0.79547095]
[-0.05708241]]
[ 0.38877511]
state [[-0.85134423]
[-0.05587326]]
[ 0.43935224]
state [[-0.90569741]
[-0.05435317]]
[ 0.4465912]
state [[-0.95832586]
[-0.05262847]]
[ 0.4521234]
state [[-1.00909054]
[-0.05076468]]
[ 0.46551782]
state [[-1.05790603]
[-0.04881548]]
[ 0.47297567]
state [[-1.1047498 ]
[-0.04684379]]
[ 0.45539564]
state [[-1.14967537]
[-0.04492557]]
[ 0.43864948]
state [[-1.19277954]
[-0.04310413]]
[ 0.40661094]
state [-1.2 0. ]
[ 1.64182925]
state [[-1.19711637]
[ 0.00288373]]
[ 1.80148149]
state [[-1.19117975]
[ 0.00593659]]
[ 1.9628942]
state [[-1.18200994]
[ 0.00916986]]
[ 2.12496138]
state [[-1.16954172]
[ 0.01246817]]
[ 2.17843175]
state [[-1.15374005]
[ 0.01580166]]
[ 2.13936758]
state [[-1.134565 ]
[ 0.01917505]]
[ 2.06066251]
state [[-1.11197531]
[ 0.02258966]]
[ 1.92038023]
state [[-1.08601236]
[ 0.02596298]]
[ 1.75115359]
state [[-1.05681515]
[ 0.02919721]]
[ 1.57703447]
state [[-1.02454197]
[ 0.0322732 ]]
[ 1.4195056]
state [[-0.98935503]
[ 0.03518694]]
[ 1.27137983]
state [[-0.95143425]
[ 0.03792077]]
[ 1.16838074]
state [[-0.91094756]
[ 0.04048669]]
[ 1.08165264]
state [[-0.86808515]
[ 0.04286239]]
[ 0.97984576]
state [[-0.8230952 ]
[ 0.04498992]]
[ 0.88050354]
state [[-0.77626884]
[ 0.04682639]]
[ 0.81445163]
state [[-0.72790933]
[ 0.04835954]]
[ 0.83076477]
state [[-0.67828083]
[ 0.04962847]]
[ 0.90669209]
state [[-0.62762672]
[ 0.05065409]]
[ 0.99413437]
state [[-0.57621086]
[ 0.05141583]]
[ 1.09201598]
state [[-0.52431005]
[ 0.0519008 ]]
[ 1.15986848]
state [[-0.47224405]
[ 0.052066 ]]
[ 1.21502817]
state [[-0.42034668]
[ 0.05189739]]
[ 1.28772318]
state [[-0.36892363]
[ 0.05142305]]
[ 1.38543141]
state [[-0.31823403]
[ 0.0506896 ]]
[ 1.53175569]
state [[-0.26845729]
[ 0.04977673]]
[ 1.6586591]
state [[-0.21975401]
[ 0.04870328]]
[ 1.70560515]
state [[-0.17232125]
[ 0.04743277]]
[ 1.66419637]
state [[-0.12639758]
[ 0.04592366]]
[ 1.54346871]
state [[-0.08225286]
[ 0.04414472]]
[ 1.36944592]
state [[-0.04016297]
[ 0.04208989]]
[ 1.24868858]
state [[-0.00030627]
[ 0.0398567 ]]
[ 1.19123113]
state [[ 0.03724166]
[ 0.03754793]]
[ 1.2225858]
state [[ 0.07252777]
[ 0.03528611]]
[ 1.25756371]
state [[ 0.10563038]
[ 0.03310262]]
[ 1.29038572]
state [[ 0.13664787]
[ 0.03101748]]
[ 1.31552863]
state [[ 0.16568801]
[ 0.02904015]]
[ 1.33174241]
state [[ 0.19286244]
[ 0.02717443]]
[ 1.37475049]
state [[ 0.21831852]
[ 0.02545609]]
[ 1.40110314]
state [[ 0.24219303]
[ 0.0238745 ]]
[ 1.41060221]
state [[ 0.26460952]
[ 0.02241647]]
[ 1.41570389]
state [[ 0.28568888]
[ 0.02107938]]
[ 1.41213202]
state [[ 0.30554375]
[ 0.01985486]]
[ 1.38834941]
state [[ 0.32426572]
[ 0.01872197]]
[ 1.36686242]
state [[ 0.34194708]
[ 0.01768136]]
[ 1.34674096]
state [[ 0.35867923]
[ 0.01673215]]
[ 1.32226384]
state [[ 0.37454659]
[ 0.01586737]]
[ 1.2954973]
state [[ 0.38962844]
[ 0.01508185]]
[ 1.27105844]
state [[ 0.40400341]
[ 0.01437497]]
[ 1.24891281]
state [[ 0.41774943]
[ 0.01374604]]
[ 1.23212135]
state [[ 0.43094701]
[ 0.01319756]]
[ 1.21812713]
state [[ 0.44367671]
[ 0.01272972]]
[ 1.20605469]
state [[ 0.45601881]
[ 0.01234208]]
[ 1.19604969]
state [[ 0.46805355]
[ 0.01203475]]
[ 1.18806279]
state [[ 0.47986171]
[ 0.01180815]]
[ 1.18195474]
state [[ 0.49152473]
[ 0.01166301]]
[ 1.17788255]
episode length using learned policy: 217
result after minibatch no. 2000 : mean squared error: 0.24948951602
batch train data [[ -7.71736654e-01 6.72156857e-02]
[ -1.00593915e+00 -1.31068799e-02]
[ 4.88448432e-01 3.75707726e-02]
[ -4.25831404e-01 -1.41456190e-02]
[ -1.10536512e+00 -7.44476778e-03]
[ 4.47537029e-01 4.66223335e-02]
[ 2.18272606e-01 4.87706537e-02]
[ -4.32908583e-02 -6.57682859e-02]
[ -1.04808482e+00 4.82670059e-02]
[ 4.33220112e-01 2.37294458e-02]
[ -8.56111603e-01 -3.43328858e-02]
[ 5.72058864e-01 6.25841129e-02]
[ -7.60091162e-01 -5.25770905e-02]
[ -2.33422995e-01 -6.80687467e-02]
[ -1.89857571e-01 6.10444760e-02]
[ -5.25707463e-01 4.32103746e-02]
[ 5.81087933e-03 -1.41265208e-02]
[ -9.58423861e-01 -1.88946076e-02]
[ 2.22984019e-01 5.95676327e-02]
[ -1.09827912e+00 -4.82694470e-02]
[ -1.41847811e-01 -4.54664657e-03]
[ -9.16959095e-01 4.36608068e-03]
[ -1.18134534e+00 5.54944034e-02]
[ 8.36780950e-02 -4.76502466e-02]
[ 1.51273240e-01 3.60347189e-02]
[ 4.35359144e-01 1.70714350e-02]
[ -4.64465618e-01 -2.68534287e-04]
[ 4.04272731e-01 -3.05030825e-03]
[ 2.67493487e-01 -5.49816459e-02]
[ -8.45789751e-01 -4.00902080e-02]
[ -8.69872834e-01 1.02157263e-02]
[ -5.71605777e-01 2.53761714e-02]
[ -3.35401894e-01 -3.00633588e-02]
[ 1.52343212e-01 -9.34001095e-03]
[ -1.01975506e+00 6.11318181e-02]
[ 2.67679530e-01 -6.49331962e-02]
[ 2.10367798e-01 -4.26697246e-02]
[ 1.92858904e-01 -3.72763789e-03]
[ 3.31391744e-01 -2.65948087e-02]
[ 5.83714368e-01 -5.10534269e-02]
[ -3.04519007e-01 4.18722727e-02]
[ 2.52146928e-01 -6.87343078e-02]
[ -4.66654440e-01 -3.34916433e-02]
[ -1.19719669e+00 -1.84012642e-02]
[ 2.29576838e-02 3.83021561e-02]
[ -6.76729665e-01 6.62426309e-02]
[ -1.03875042e+00 -5.21632444e-02]
[ -5.75819141e-01 -3.27541530e-02]
[ 5.94683450e-01 -5.56931417e-02]
[ -1.05995635e+00 -2.38342723e-02]]
batch train labels [[ 1. ]
[ 1.14473635]
[ 1.0021732 ]
[ 0.23244138]
[ 0.8526459 ]
[ 1. ]
[ 1.00271191]
[ 1. ]
[ 1. ]
[ 1.29067028]
[ 0.6045682 ]
[ 1. ]
[ 0.10847247]
[ 0.99939357]
[ 0.99983717]
[ 2.06895135]
[ 0.42664555]
[ 1.0698471 ]
[ 1. ]
[ 0.67835728]
[ 0.58241356]
[ 1.17997436]
[ 1. ]
[ 1. ]
[ 1.36786012]
[ 1.29067028]
[-0.96749015]
[ 0.2292072 ]
[ 1. ]
[ 0.6045682 ]
[ 1.17997436]
[ 1.85801243]
[ 0.04829047]
[ 0.69129624]
[ 1. ]
[ 1. ]
[ 1. ]
[ 0.69129624]
[ 0.97979885]
[ 1. ]
[ 1.31216807]
[ 1. ]
[ 0.04829047]
[ 0.62209078]
[ 1.43590492]
[ 1. ]
[ 0.67835728]
[ 0.46513577]
[ 1. ]
[ 0.62209078]]
plotting the mu() policy learned by NN
state [-0.43392027 0. ]
[ 1.34586442]
state [[ -4.34238911e-01]
[ -3.18639999e-04]]
[ 1.31774664]
state [[-0.43490201]
[-0.00066309]]
[ 1.2890178]
state [[-0.4359335 ]
[-0.00103148]]
[ 1.25997853]
state [[-0.43735495]
[-0.00142144]]
[ 1.23112023]
state [[-0.4391849 ]
[-0.00182995]]
[ 1.20279336]
state [[-0.44143844]
[-0.00225352]]
[ 1.17480516]
state [[-0.44412714]
[-0.00268869]]
[ 1.14798427]
state [[-0.44725826]
[-0.00313112]]
[ 1.1226989]
state [[-0.45083424]
[-0.00357598]]
[ 1.09952474]
state [[-0.4548521 ]
[-0.00401787]]
[ 1.0789578]
state [[-0.45930296]
[-0.00445086]]
[ 1.06151056]
state [[-0.46417156]
[-0.00486858]]
[ 1.04770327]
state [[-0.46943578]
[-0.00526423]]
[ 1.03806424]
state [[-0.47506639]
[-0.0056306 ]]
[ 1.03311706]
state [[-0.48102659]
[-0.00596019]]
[ 1.03337789]
state [[-0.48727182]
[-0.00624524]]
[ 1.0401299]
state [[-0.49374884]
[-0.00647702]]
[ 1.05244088]
state [[-0.50039703]
[-0.00664816]]
[ 1.07100892]
state [[-0.50714803]
[-0.00675102]]
[ 1.09645033]
state [[-0.51392591]
[-0.0067779 ]]
[ 1.12773216]
state [[-0.5206486]
[-0.0067227]]
[ 1.16559541]
state [[-0.52722782]
[-0.00657923]]
[ 1.20999432]
state [[-0.53356981]
[-0.00634202]]
[ 1.25972009]
state [[-0.53957736]
[-0.00600753]]
[ 1.31110716]
state [[-0.54515398]
[-0.00557663]]
[ 1.36705482]
state [[-0.55020201]
[-0.00504802]]
[ 1.43065166]
state [[-0.55462009]
[-0.00441806]]
[ 1.50116515]
state [[-0.55830467]
[-0.00368457]]
[ 1.57242501]
state [[-0.56115699]
[-0.00285232]]
[ 1.64968145]
state [[-0.56307852]
[-0.00192155]]
[ 1.72773707]
state [[-0.56397694]
[-0.00089841]]
[ 1.8051641]
state [[ -5.63768089e-01]
[ 2.08854908e-04]]
[ 1.87773752]
state [[-0.56238097]
[ 0.00138713]]
[ 1.94416606]
state [[-0.55975944]
[ 0.00262151]]
[ 2.00673532]
state [[-0.55586725]
[ 0.00389218]]
[ 2.05842018]
state [[-0.55073345]
[ 0.00513382]]
[ 2.10291243]
state [[-0.54439634]
[ 0.0063371 ]]
[ 2.14579296]
state [[-0.53690338]
[ 0.00749299]]
[ 2.18674684]
state [[-0.52831066]
[ 0.00859274]]
[ 2.21656704]
state [[-0.5186826 ]
[ 0.00962808]]
[ 2.24598169]
state [[-0.50809139]
[ 0.01059121]]
[ 2.26924992]
state [[-0.49661645]
[ 0.01147495]]
[ 2.28323984]
state [[-0.48434365]
[ 0.0122728 ]]
[ 2.29663372]
state [[-0.47136459]
[ 0.01297907]]
[ 2.31113434]
state [[-0.45777568]
[ 0.01358891]]
[ 2.32702446]
state [[-0.44367725]
[ 0.01409844]]
[ 2.33549166]
state [[-0.42917249]
[ 0.01450476]]
[ 2.32124758]
state [[-0.41436648]
[ 0.01480599]]
[ 2.29431295]
state [[-0.39936516]
[ 0.01500134]]
[ 2.25468636]
state [[-0.38427415]
[ 0.01509101]]
[ 2.20634794]
state [[-0.36919793]
[ 0.01507623]]
[ 2.15470648]
state [[-0.35423875]
[ 0.01495919]]
[ 2.1016531]
state [[-0.33949581]
[ 0.01474294]]
[ 2.05279922]
state [[-0.32506451]
[ 0.01443131]]
[ 2.0030942]
state [[-0.31103572]
[ 0.01402879]]
[ 1.95296156]
state [[-0.29754233]
[ 0.0134934 ]]
[ 1.89603531]
state [[-0.28472131]
[ 0.01282101]]
[ 1.83684301]
state [[-0.27270558]
[ 0.01201573]]
[ 1.7566117]
state [[-0.26164225]
[ 0.01106335]]
[ 1.66713548]
state [[-0.25168037]
[ 0.00996189]]
[ 1.57017183]
state [[-0.24296892]
[ 0.00871145]]
[ 1.47962761]
state [[-0.2356426 ]
[ 0.00732632]]
[ 1.38890636]
state [[-0.22982827]
[ 0.00581432]]
[ 1.29353988]
state [[-0.22564934]
[ 0.00417893]]
[ 1.18035614]
state [[-0.22323878]
[ 0.00241056]]
[ 1.05946672]
state [[-0.22272874]
[ 0.00051004]]
[ 0.92026401]
state [[-0.22426081]
[-0.00153207]]
[ 0.78186011]
state [[-0.22796623]
[-0.00370543]]
[ 0.64696229]
state [[-0.23396249]
[-0.00599626]]
[ 0.511729]
state [[-0.24235608]
[-0.00839359]]
[ 0.3823154]
state [[-0.25323516]
[-0.01087909]]
[ 0.26626569]
state [[-0.26666057]
[-0.01342542]]
[ 0.18169817]
state [[-0.28264609]
[-0.01598553]]
[ 0.14187831]
state [[-0.30114356]
[-0.01849747]]
[ 0.10676824]
state [[-0.32208157]
[-0.020938 ]]
[ 0.07860838]
state [[-0.34536195]
[-0.02328038]]
[ 0.04816483]
state [[-0.37086815]
[-0.02550619]]
[ 0.0020213]
state [[-0.39847815]
[-0.02760999]]
[-0.0286148]
state [[-0.42800465]
[-0.02952651]]
[-0.04355665]
state [[-0.45923832]
[-0.03123368]]
[-0.04724225]
state [[-0.4919517 ]
[-0.03271339]]
[-0.04596674]
state [[-0.52590209]
[-0.03395039]]
[-0.01125608]
state [[-0.56083518]
[-0.03493311]]
[ 0.0742421]
state [[-0.59641534]
[-0.03558018]]
[ 0.11135311]
state [[-0.6323424 ]
[-0.03592703]]
[ 0.13945183]
state [[-0.66832876]
[-0.03598639]]
[ 0.16696534]
state [[-0.7040965 ]
[-0.03576773]]
[ 0.20047975]
state [[-0.73937523]
[-0.03527872]]
[ 0.2544913]
state [[-0.77389181]
[-0.03451658]]
[ 0.33461612]
state [[-0.80736804]
[-0.03347626]]
[ 0.43021208]
state [[-0.83953375]
[-0.03216569]]
[ 0.49454263]
state [[-0.87017453]
[-0.0306408 ]]
[ 0.56149119]
state [[-0.89909816]
[-0.02892365]]
[ 0.62072235]
state [[-0.92614383]
[-0.02704564]]
[ 0.65879858]
state [[-0.95119375]
[-0.0250499 ]]
[ 0.69597185]
state [[-0.97415066]
[-0.0229569 ]]
[ 0.72210538]
state [[-0.99494523]
[-0.02079458]]
[ 0.74382055]
state [[-1.01352668]
[-0.01858141]]
[ 0.76213008]
state [[-1.02985871]
[-0.01633203]]
[ 0.82364714]
state [[-1.04387045]
[-0.01401176]]
[ 0.94632339]
state [[-1.05543602]
[-0.01156556]]
[ 1.10595357]
state [[-1.06439638]
[-0.00896037]]
[ 1.26909077]
state [[-1.07059097]
[-0.00619461]]
[ 1.44604278]
state [[-1.07384574]
[-0.00325472]]
[ 1.62488139]
state [[ -1.07398355e+00]
[ -1.37824798e-04]]
[ 1.80682516]
state [[-1.0708226 ]
[ 0.00316093]]
[ 1.99269915]
state [[-1.06417525]
[ 0.00664736]]
[ 2.17016625]
state [[-1.05403113]
[ 0.01014411]]
[ 2.25894213]
state [[-1.04038751]
[ 0.01364359]]
[ 2.23814249]
state [[-1.0232445 ]
[ 0.01714307]]
[ 2.11681938]
state [[-1.00260794]
[ 0.02063661]]
[ 1.96374595]
state [[-0.97852993]
[ 0.02407803]]
[ 1.79625237]
state [[-0.95120853]
[ 0.02732142]]
[ 1.6438241]
state [[-0.92084622]
[ 0.0303623 ]]
[ 1.50016248]
state [[-0.88766122]
[ 0.033185 ]]
[ 1.38274801]
state [[-0.85187435]
[ 0.03578684]]
[ 1.26177752]
state [[-0.81374282]
[ 0.03813156]]
[ 1.15590382]
state [[-0.77354383]
[ 0.04019898]]
[ 1.06418204]
state [[-0.73157686]
[ 0.04196696]]
[ 1.01129699]
state [[-0.68813801]
[ 0.04343884]]
[ 1.00876474]
state [[-0.64350587]
[ 0.04463214]]
[ 1.09075236]
state [[-0.59790295]
[ 0.04560293]]
[ 1.19628286]
state [[-0.55155104]
[ 0.04635189]]
[ 1.30531752]
state [[-0.50468445]
[ 0.0468666 ]]
[ 1.42071187]
state [[-0.4575389 ]
[ 0.04714553]]
[ 1.51695907]
state [[-0.41036862]
[ 0.04717028]]
[ 1.60817504]
state [[-0.36342317]
[ 0.04694546]]
[ 1.70073915]
state [[-0.31693259]
[ 0.04649059]]
[ 1.84702039]
state [[-0.27104756]
[ 0.04588502]]
[ 2.03863287]
state [[-0.22588059]
[ 0.04516698]]
[ 2.19925785]
state [[-0.18166125]
[ 0.04421934]]
[ 2.19000816]
state [[-0.13857974]
[ 0.04308151]]
[ 2.05368996]
state [[-0.09678528]
[ 0.04179446]]
[ 1.85125291]
state [[-0.05653492]
[ 0.04025036]]
[ 1.71628213]
state [[-0.01803241]
[ 0.03850251]]
[ 1.62249994]
state [[ 0.01859626]
[ 0.03662867]]
[ 1.55066609]
state [[ 0.05327948]
[ 0.03468322]]
[ 1.52337694]
state [[ 0.08601795]
[ 0.03273847]]
[ 1.53170943]
state [[ 0.11687091]
[ 0.03085296]]
[ 1.53398371]
state [[ 0.14590995]
[ 0.02903904]]
[ 1.54735124]
state [[ 0.17323205]
[ 0.0273221 ]]
[ 1.57056761]
state [[ 0.19895479]
[ 0.02572274]]
[ 1.61745882]
state [[ 0.22322723]
[ 0.02427244]]
[ 1.63140583]
state [[ 0.24617103]
[ 0.0229438 ]]
[ 1.63404107]
state [[ 0.2679002 ]
[ 0.02172917]]
[ 1.63001931]
state [[ 0.28852427]
[ 0.02062407]]
[ 1.61189783]
state [[ 0.30813971]
[ 0.01961545]]
[ 1.59281862]
state [[ 0.32684225]
[ 0.01870253]]
[ 1.56500387]
state [[ 0.34471831]
[ 0.01787607]]
[ 1.53369093]
state [[ 0.36184993]
[ 0.01713163]]
[ 1.50479925]
state [[ 0.37832031]
[ 0.01647036]]
[ 1.47830105]
state [[ 0.39421356]
[ 0.01589324]]
[ 1.45416117]
state [[ 0.40961474]
[ 0.0154012 ]]
[ 1.43166304]
state [[ 0.42460927]
[ 0.01499454]]
[ 1.41019905]
state [[ 0.43928245]
[ 0.01467319]]
[ 1.3910675]
state [[ 0.45372105]
[ 0.0144386 ]]
[ 1.37426436]
state [[ 0.46801367]
[ 0.01429262]]
[ 1.36040998]
state [[ 0.48225173]
[ 0.01423807]]
[ 1.35016048]
state [[ 0.49653065]
[ 0.01427892]]
[ 1.34211731]
episode length using learned policy: 160
result after minibatch no. 2500 : mean squared error: 0.252490282059
batch train data [[ 0.51152835 0.03864396]
[-0.88334975 0.04798354]
[-0.09685641 0.00448908]
[ 0.50457711 0.06401512]
[-0.99483139 0.03741501]
[ 0.09694891 -0.02706233]
[-0.09729257 -0.06083112]
[-1.0673756 -0.00469454]
[-0.75400291 -0.04806704]
[-0.86843548 -0.02975838]
[-0.40819671 0.0676523 ]
[-0.91071893 0.01536018]
[-0.03994666 0.04611755]
[-0.43685338 0.02995514]
[-0.22438025 0.02209474]
[ 0.40879559 0.02472314]
[-0.87256496 -0.06656697]
[ 0.5543826 -0.06106608]
[ 0.2502327 0.05355559]
[ 0.3069261 -0.03683163]
[ 0.03110618 -0.03529785]
[-0.25221738 0.05269055]
[-0.67828901 -0.04295455]
[-0.80496988 0.03479189]
[-0.30947878 -0.00586192]
[-0.55912344 -0.05404565]
[-0.95347379 0.02243578]
[-1.03458667 0.04357261]
[-0.84251824 0.05031131]
[-1.01350889 -0.06000797]
[-0.9872011 -0.01326568]
[ 0.53158375 0.0187619 ]
[ 0.10563374 0.03614629]
[ 0.46921031 0.0156212 ]
[-1.08891151 0.01399762]
[ 0.53859561 -0.02155576]
[-0.39659795 -0.01057462]
[-0.90004402 -0.03736243]
[ 0.1776976 0.04677869]
[-0.84095086 -0.04568073]
[-0.43330612 -0.01277961]
[-0.76074258 0.06099143]
[-1.08653491 -0.01621972]
[-1.03920326 -0.03018457]
[-0.89033435 0.00950131]
[-0.77659736 0.06250147]
[ 0.549096 0.04806387]
[-0.61005334 0.03894289]
[-0.65378732 0.03489525]
[ 0.37674345 0.05172322]]
batch train labels [[ 1.0021732 ]
[ 0.74289175]
[ 0.94528861]
[ 1. ]
[ 2.59707689]
[ 0.27452517]
[ 1. ]
[ 0.8526459 ]
[ 0.10847247]
[ 0.6045682 ]
[ 1.05661102]
[ 1.55615993]
[ 1.54714103]
[ 1.31216807]
[ 1.57595806]
[ 1.97769483]
[ 0.93533027]
[ 1. ]
[ 1. ]
[ 1. ]
[ 0.27077842]
[ 3.42616224]
[ 0.10847247]
[ 1.04543336]
[-0.96749015]
[-0.14023047]
[ 1.55615993]
[ 1. ]
[ 0.74289175]
[ 0.93533027]
[ 1.14473635]
[ 1.29067028]
[ 1.36786012]
[ 1.29067028]
[ 2.86599393]
[ 1. ]
[-0.96749015]
[ 0.6045682 ]
[ 1.00271191]
[-0.38809582]
[-0.96749015]
[ 1. ]
[ 0.62209078]
[ 0.62276976]
[ 1.17997436]
[ 1. ]
[ 1. ]
[ 1.22226744]
[ 1.22226744]
[ 1. ]]
plotting the mu() policy learned by NN
state [-0.58089582 0. ]
[ 1.81932378]
state [[-0.57964891]
[ 0.00124694]]
[ 1.91375804]
state [[-0.57706982]
[ 0.00257909]]
[ 1.99709916]
state [[-0.57309431]
[ 0.00397551]]
[ 2.07166433]
state [[-0.56774896]
[ 0.00534536]]
[ 2.12506771]
state [[-0.56107342]
[ 0.00667552]]
[ 2.17883253]
state [[-0.55311745]
[ 0.00795599]]
[ 2.23348618]
state [[-0.54394037]
[ 0.00917709]]
[ 2.28523564]
state [[-0.53361082]
[ 0.01032956]]
[ 2.32363653]
state [[-0.52220619]
[ 0.01140464]]
[ 2.34905219]
state [[-0.509812 ]
[ 0.01239419]]
[ 2.35917687]
state [[-0.49652117]
[ 0.01329082]]
[ 2.3596251]
state [[-0.4824332 ]
[ 0.01408796]]
[ 2.36710572]
state [[-0.46765319]
[ 0.01478 ]]
[ 2.37688589]
state [[-0.4522908 ]
[ 0.01536238]]
[ 2.36759853]
state [[-0.43645915]
[ 0.01583164]]
[ 2.33043623]
state [[-0.42027363]
[ 0.01618551]]
[ 2.28644681]
state [[-0.4038507 ]
[ 0.01642292]]
[ 2.2359066]
state [[-0.38730669]
[ 0.01654401]]
[ 2.18049741]
state [[-0.37075663]
[ 0.01655006]]
[ 2.12368155]
state [[-0.35431314]
[ 0.01644349]]
[ 2.06231833]
state [[-0.33808541]
[ 0.01622773]]
[ 2.0020504]
state [[-0.3221783]
[ 0.0159071]]
[ 1.93383133]
state [[-0.30675775]
[ 0.01542054]]
[ 1.85469496]
state [[-0.29199651]
[ 0.01476123]]
[ 1.76500332]
state [[-0.27807087]
[ 0.01392564]]
[ 1.65945709]
state [[-0.26516518]
[ 0.0129057 ]]
[ 1.55963469]
state [[-0.25344968]
[ 0.01171551]]
[ 1.46713078]
state [[-0.24307853]
[ 0.01037115]]
[ 1.37482369]
state [[-0.23419677]
[ 0.00888176]]
[ 1.27885473]
state [[-0.22694407]
[ 0.00725269]]
[ 1.1863178]
state [[-0.22144769]
[ 0.00549638]]
[ 1.07848406]
state [[-0.21784113]
[ 0.00360656]]
[ 0.96150911]
state [[-0.21625793]
[ 0.00158321]]
[ 0.83763039]
state [[-0.21682915]
[-0.00057123]]
[ 0.70331991]
state [[-0.21968652]
[-0.00285738]]
[ 0.56370556]
state [[-0.22495662]
[-0.00527009]]
[ 0.42304748]
state [[-0.23275563]
[-0.00779902]]
[ 0.3092235]
state [[-0.24316032]
[-0.01040469]]
[ 0.20991324]
state [[-0.25621891]
[-0.01305857]]
[ 0.1329256]
state [[-0.27194166]
[-0.01572276]]
[ 0.1035373]
state [[-0.29027405]
[-0.0183324 ]]
[ 0.09051277]
state [[-0.31112644]
[-0.02085238]]
[ 0.06952205]
state [[-0.33439711]
[-0.02327066]]
[ 0.03729346]
state [[-0.3599745]
[-0.0255774]]
[-0.00885954]
state [[-0.38773039]
[-0.02775589]]
[-0.05323765]
state [[-0.41747731]
[-0.02974692]]
[-0.10727188]
state [[-0.44900677]
[-0.03152946]]
[-0.13735884]
state [[-0.48209101]
[-0.03308424]]
[-0.13708818]
state [[-0.51648575]
[-0.03439474]]
[-0.10453612]
state [[-0.55193383]
[-0.03544809]]
[-0.03938988]
state [[-0.58816963]
[-0.03623583]]
[ 0.03343928]
state [[-0.62489074]
[-0.03672113]]
[ 0.12243668]
state [[-0.66174138]
[-0.03685064]]
[ 0.22026286]
state [[-0.69836509]
[-0.03662371]]
[ 0.26814184]
state [[-0.73446918]
[-0.03610406]]
[ 0.28919235]
state [[-0.76980591]
[-0.03533673]]
[ 0.31842417]
state [[-0.80414104]
[-0.03433514]]
[ 0.38093314]
state [[-0.83723092]
[-0.03308988]]
[ 0.4520362]
state [[-0.86884856]
[-0.03161763]]
[ 0.54403162]
state [[-0.89877152]
[-0.02992299]]
[ 0.63801569]
state [[-0.92680025]
[-0.02802874]]
[ 0.6979593]
state [[-0.95279235]
[-0.02599209]]
[ 0.74928439]
state [[-0.97663474]
[-0.02384241]]
[ 0.77056289]
state [[-0.99826241]
[-0.02162765]]
[ 0.78874934]
state [[-1.01762819]
[-0.01936579]]
[ 0.80849564]
state [[-1.03469527]
[-0.01706713]]
[ 0.82790935]
state [[-1.04943621]
[-0.01474097]]
[ 0.86882484]
state [[-1.06180847]
[-0.01237221]]
[ 0.94137478]
state [[-1.0717417 ]
[-0.00993323]]
[ 1.04340994]
state [[-1.07913828]
[-0.0073966 ]]
[ 1.17848027]
state [[-1.08386791]
[-0.00472958]]
[ 1.34843516]
state [[-1.08576417]
[-0.00189626]]
[ 1.53202176]
state [[-1.08464515]
[ 0.00111905]]
[ 1.72683311]
state [[-1.08031499]
[ 0.00433012]]
[ 1.91264415]
state [[-1.07258451]
[ 0.00773043]]
[ 2.07307458]
state [[-1.06136131]
[ 0.01122319]]
[ 2.20023203]
state [[-1.0466404 ]
[ 0.01472093]]
[ 2.21054554]
state [[-1.02841949]
[ 0.01822093]]
[ 2.15501642]
state [[-1.00670254]
[ 0.02171696]]
[ 2.0921979]
state [[-0.98150402]
[ 0.02519854]]
[ 1.9900322]
state [[-0.95286387]
[ 0.02864017]]
[ 1.8717823]
state [[-0.92095137]
[ 0.03191251]]
[ 1.75496709]
state [[-0.88596106]
[ 0.03499031]]
[ 1.65352046]
state [[-0.84810406]
[ 0.03785702]]
[ 1.55516613]
state [[-0.8076247 ]
[ 0.04047935]]
[ 1.45444739]
state [[-0.76480925]
[ 0.04281543]]
[ 1.372684]
state [[-0.71996588]
[ 0.04484339]]
[ 1.32899797]
state [[-0.67340446]
[ 0.04656142]]
[ 1.32404089]
state [[-0.62543291]
[ 0.04797156]]
[ 1.39495015]
state [[-0.57631445]
[ 0.04911844]]
[ 1.50505078]
state [[-0.52629721]
[ 0.05001722]]
[ 1.60469365]
state [[-0.47565508]
[ 0.05064215]]
[ 1.66479373]
state [[-0.42470649]
[ 0.0509486 ]]
[ 1.71213412]
state [[-0.37377661]
[ 0.05092987]]
[ 1.77431357]
state [[-0.32315865]
[ 0.05061797]]
[ 1.86287796]
state [[-0.27309212]
[ 0.05006652]]
[ 1.98583806]
state [[-0.22374663]
[ 0.04934549]]
[ 2.03380108]
state [[-0.17535877]
[ 0.04838786]]
[ 1.99540269]
state [[-0.12813747]
[ 0.0472213 ]]
[ 1.87688804]
state [[-0.08235683]
[ 0.04578064]]
[ 1.69593716]
state [[-0.03830433]
[ 0.0440525 ]]
[ 1.54282701]
state [[ 0.00380749]
[ 0.04211181]]
[ 1.44833934]
state [[ 0.0438678 ]
[ 0.04006032]]
[ 1.3922466]
state [[ 0.08184198]
[ 0.03797418]]
[ 1.37090135]
state [[ 0.11776204]
[ 0.03592006]]
[ 1.38479602]
state [[ 0.1517213 ]
[ 0.03395925]]
[ 1.39870703]
state [[ 0.18383378]
[ 0.03211249]]
[ 1.42985797]
state [[ 0.21424678]
[ 0.030413 ]]
[ 1.47039306]
state [[ 0.24312903]
[ 0.02888225]]
[ 1.51027083]
state [[ 0.2706576 ]
[ 0.02752856]]
[ 1.52456641]
state [[ 0.29699057]
[ 0.02633296]]
[ 1.50884187]
state [[ 0.32226074]
[ 0.02527016]]
[ 1.49034405]
state [[ 0.34660137]
[ 0.02434063]]
[ 1.47151089]
state [[ 0.37014753]
[ 0.02354616]]
[ 1.45483506]
state [[ 0.39303786]
[ 0.02289034]]
[ 1.43823636]
state [[ 0.41541207]
[ 0.02237421]]
[ 1.42159891]
state [[ 0.43741065]
[ 0.02199858]]
[ 1.4024173]
state [[ 0.45917243]
[ 0.02176177]]
[ 1.38223481]
state [[ 0.48083624]
[ 0.02166381]]
[ 1.35659468]
episode length using learned policy: 120
result after minibatch no. 3000 : mean squared error: 0.168998673558
batch train data [[ -5.20550924e-01 3.97777355e-02]
[ -9.76474135e-05 2.64236431e-04]
[ 5.20480606e-01 3.57122871e-02]
[ 5.40856044e-01 2.97238174e-02]
[ -1.13979703e+00 -3.28613290e-02]
[ -8.43054142e-01 -1.35612836e-02]
[ 1.69265351e-01 3.16088143e-02]
[ -6.01981915e-01 -4.35232419e-02]
[ 3.49228601e-01 -3.70572584e-02]
[ -8.53149822e-02 2.64580409e-02]
[ 1.40875915e-01 4.16086563e-02]
[ -7.09800284e-01 -2.75220099e-02]
[ -1.11546071e+00 4.19765158e-02]
[ 1.27236074e-01 -4.44642035e-02]
[ 1.56671445e-02 -3.72749298e-02]
[ 5.60197395e-01 3.22478078e-02]
[ 2.02575314e-01 -2.53324585e-02]
[ -5.04160219e-02 3.45945370e-03]
[ -5.80272320e-01 -2.02959307e-02]
[ -5.03435297e-01 6.49638244e-02]
[ 5.37021647e-01 2.69910082e-02]
[ -1.09846364e+00 1.57628316e-02]
[ -8.30064189e-01 1.58578910e-02]
[ 5.84227676e-01 6.15586969e-02]
[ -9.15037709e-01 -5.62704038e-02]
[ 1.47602416e-01 -6.61400926e-02]
[ 6.35239547e-02 5.01253632e-02]
[ -8.00131034e-01 -1.15742235e-02]
[ -8.55596539e-01 -8.73094988e-03]
[ -4.54147589e-01 -2.82335080e-02]
[ -7.61915895e-01 9.23759925e-04]
[ -6.92165077e-01 4.39577311e-02]
[ -1.14844081e+00 -3.86852576e-02]
[ -5.85098628e-01 3.27598169e-02]
[ -5.67300109e-01 5.40443240e-02]
[ 3.96925979e-01 5.83629523e-03]
[ -9.28160652e-02 -1.78751397e-02]
[ -1.42999307e-01 9.12169394e-03]
[ -1.83352426e-01 -6.80946277e-02]
[ -7.15303444e-01 4.62020350e-02]
[ -1.08495731e-01 -6.41296411e-03]
[ -8.92422774e-02 4.86191439e-02]
[ -2.09182512e-01 -1.59181687e-02]
[ -1.05637457e-01 2.65178711e-03]
[ 1.37877952e-02 2.74503278e-02]
[ -3.76256888e-01 5.84476101e-02]
[ -7.37284669e-01 -3.86796493e-02]
[ -3.82458561e-01 3.30310496e-02]
[ -4.98560612e-01 5.56171288e-02]
[ -9.92002811e-01 4.64755166e-02]]
batch train labels [[ 1.22226744]
[ 0.94528861]
[ 1.0021732 ]
[ 1.0021732 ]
[ 0.62276976]
[ 1.14473635]
[ 1.36786012]
[-0.14023047]
[ 1. ]
[ 1.1842359 ]
[ 1.36786012]
[ 0.92107525]
[ 1.02836613]
[ 1. ]
[ 0.27077842]
[ 1.0021732 ]
[ 0.27452517]
[ 0.94528861]
[-0.81585296]
[ 1.00617072]
[ 1.29067028]
[ 2.887229 ]
[ 1.43051834]
[ 1. ]
[ 0.93533027]
[ 1. ]
[ 1.00271191]
[ 1.09654035]
[ 1.14473635]
[ 0.04829047]
[ 1.23673655]
[ 0.61158972]
[ 0.62276976]
[ 1.22226744]
[ 2.06895135]
[ 1.13224935]
[ 0.42664555]
[ 1.45858352]
[ 0.99939357]
[ 0.61158972]
[ 0.81870769]
[ 1.54714103]
[ 0.11962489]
[ 0.94528861]
[ 1.1842359 ]
[ 1.05661102]
[ 0.71581806]
[ 1.31216807]
[ 2.06895135]
[ 0.74289175]]
plotting the mu() policy learned by NN
state [-0.43470072 0. ]
[ 1.30864716]
state [[ -4.35050935e-01]
[ -3.50212678e-04]]
[ 1.27257991]
state [[-0.43578491]
[-0.00073396]]
[ 1.23500371]
state [[-0.43693489]
[-0.00114997]]
[ 1.19633448]
state [[-0.43853119]
[-0.00159631]]
[ 1.15851092]
state [[-0.4406001 ]
[-0.00206891]]
[ 1.12060463]
state [[-0.44316447]
[-0.00256438]]
[ 1.08309817]
state [[-0.44624317]
[-0.0030787 ]]
[ 1.04652143]
state [[-0.44985032]
[-0.00360715]]
[ 1.0115788]
state [[-0.45399451]
[-0.00414419]]
[ 0.97612923]
state [[-0.45868081]
[-0.0046863 ]]
[ 0.94302434]
state [[-0.4639079 ]
[-0.00522709]]
[ 0.91165739]
state [[-0.46966863]
[-0.00576073]]
[ 0.88381141]
state [[-0.47594824]
[-0.00627963]]
[ 0.8602249]
state [[-0.4827238 ]
[-0.00677557]]
[ 0.84166729]
state [[-0.4899635]
[-0.0072397]]
[ 0.82809836]
state [[-0.49762693]
[-0.00766343]]
[ 0.82081968]
state [[-0.50566411]
[-0.00803721]]
[ 0.82085943]
state [[-0.5140149 ]
[-0.00835079]]
[ 0.82967001]
state [[-0.52260786]
[-0.00859299]]
[ 0.8472752]
state [[-0.53136098]
[-0.00875315]]
[ 0.87383938]
state [[-0.54018205]
[-0.0088211 ]]
[ 0.90992826]
state [[-0.54896891]
[-0.00878685]]
[ 0.95585507]
state [[-0.5576098]
[-0.0086409]]
[ 1.00895739]
state [[-0.56598711]
[-0.0083773 ]]
[ 1.07358718]
state [[-0.57397377]
[-0.00798666]]
[ 1.15007353]
state [[-0.58143401]
[-0.00746021]]
[ 1.23622131]
state [[-0.58822644]
[-0.0067924 ]]
[ 1.33136654]
state [[-0.5942058 ]
[-0.00597935]]
[ 1.4373343]
state [[-0.59922224]
[-0.00501642]]
[ 1.54406714]
state [[-0.60313225]
[-0.00391003]]
[ 1.65149474]
state [[-0.60579991]
[-0.00266768]]
[ 1.76650262]
state [[-0.60709083]
[-0.00129089]]
[ 1.86248899]
state [[ -6.06899559e-01]
[ 1.91258499e-04]]
[ 1.94208169]
state [[-0.60514796]
[ 0.00175161]]
[ 2.02187514]
state [[-0.60179079]
[ 0.00335715]]
[ 2.09457469]
state [[-0.59685254]
[ 0.00493823]]
[ 2.16676569]
state [[-0.59036934]
[ 0.00648322]]
[ 2.23788404]
state [[-0.5823887 ]
[ 0.00798065]]
[ 2.31296253]
state [[-0.57296938]
[ 0.0094193 ]]
[ 2.37663436]
state [[-0.56218117]
[ 0.01078822]]
[ 2.42655993]
state [[-0.5501042 ]
[ 0.01207695]]
[ 2.45381689]
state [[-0.5368287 ]
[ 0.01327553]]
[ 2.46789908]
state [[-0.52245396]
[ 0.01437473]]
[ 2.47434402]
state [[-0.50708783]
[ 0.01536614]]
[ 2.4858408]
state [[-0.49084547]
[ 0.01624236]]
[ 2.5186317]
state [[-0.47384837]
[ 0.0169971 ]]
[ 2.55769348]
state [[-0.45622301]
[ 0.01762536]]
[ 2.59693074]
state [[-0.43809953]
[ 0.01812347]]
[ 2.63117266]
state [[-0.41961029]
[ 0.01848924]]
[ 2.63551855]
state [[-0.40088838]
[ 0.01872191]]
[ 2.59092569]
state [[-0.38206616]
[ 0.01882223]]
[ 2.51249838]
state [[-0.3632738 ]
[ 0.01879235]]
[ 2.42629766]
state [[-0.34463805]
[ 0.01863574]]
[ 2.34575939]
state [[-0.32628095]
[ 0.01835709]]
[ 2.2543292]
state [[-0.30831882]
[ 0.01796213]]
[ 2.17207456]
state [[-0.29086137]
[ 0.01745746]]
[ 2.08298111]
state [[-0.27401105]
[ 0.01685034]]
[ 1.98507071]
state [[-0.25787747]
[ 0.01613357]]
[ 1.88512063]
state [[-0.24264722]
[ 0.01523025]]
[ 1.77114153]
state [[-0.22851218]
[ 0.01413503]]
[ 1.66300535]
state [[-0.21564935]
[ 0.01286284]]
[ 1.57858539]
state [[-0.20420274]
[ 0.0114466 ]]
[ 1.48044682]
state [[-0.19432107]
[ 0.00988167]]
[ 1.38916731]
state [[-0.18613732]
[ 0.00818375]]
[ 1.28972578]
state [[-0.17978409]
[ 0.00635323]]
[ 1.19272733]
state [[-0.17538324]
[ 0.00440086]]
[ 1.08814991]
state [[-0.1730561 ]
[ 0.00232714]]
[ 0.98019272]
state [[ -1.72919348e-01]
[ 1.36750750e-04]]
[ 0.84707701]
state [[-0.17510661]
[-0.00218726]]
[ 0.7101832]
state [[-0.1797466 ]
[-0.00463999]]
[ 0.57051575]
state [[-0.18696132]
[-0.00721472]]
[ 0.46834156]
state [[-0.19682467]
[-0.00986334]]
[ 0.39071834]
state [[-0.20937398]
[-0.01254931]]
[ 0.31047231]
state [[-0.22463566]
[-0.01526167]]
[ 0.28835812]
state [[-0.24256244]
[-0.01792679]]
[ 0.28829807]
state [[-0.26306772]
[-0.02050528]]
[ 0.28496653]
state [[-0.28604907]
[-0.02298134]]
[ 0.25814003]
state [[-0.31140688]
[-0.0253578 ]]
[ 0.22311454]
state [[-0.33902767]
[-0.0276208 ]]
[ 0.1794907]
state [[-0.36878359]
[-0.02975593]]
[ 0.12750363]
state [[-0.40053183]
[-0.03174825]]
[ 0.06627749]
state [[-0.43411598]
[-0.03358414]]
[ 0.01001416]
state [[-0.4693532 ]
[-0.03523722]]
[-0.02759214]
state [[-0.50599545]
[-0.03664227]]
[-0.02314813]
state [[-0.54376966]
[-0.03777423]]
[ 0.02637456]
state [[-0.58236635]
[-0.03859667]]
[ 0.10576288]
state [[-0.62141877]
[-0.03905243]]
[ 0.18078502]
state [[-0.66056722]
[-0.03914848]]
[ 0.24605714]
state [[-0.69947106]
[-0.03890382]]
[ 0.31742364]
state [[-0.73779875]
[-0.03832771]]
[ 0.36136079]
state [[-0.77526689]
[-0.03746815]]
[ 0.38471752]
state [[-0.8116371]
[-0.0363702]]
[ 0.41699272]
state [[-0.84668899]
[-0.03505191]]
[ 0.44173235]
state [[-0.880238 ]
[-0.03354899]]
[ 0.50330102]
state [[-0.91209078]
[-0.03185279]]
[ 0.54640472]
state [[-0.94209975]
[-0.03000894]]
[ 0.56269199]
state [[-0.97016925]
[-0.02806949]]
[ 0.58120555]
state [[-0.99622399]
[-0.02605474]]
[ 0.6075449]
state [[-1.02020037]
[-0.02397637]]
[ 0.62921655]
state [[-1.04205573]
[-0.02185534]]
[ 0.64201403]
state [[-1.06176937]
[-0.01971363]]
[ 0.65549219]
state [[-1.07932985]
[-0.01756052]]
[ 0.66871083]
state [[-1.09473324]
[-0.01540342]]
[ 0.68322361]
state [[-1.10797882]
[-0.01324557]]
[ 0.70507103]
state [[-1.11906075]
[-0.01108195]]
[ 0.77704406]
state [[-1.12792349]
[-0.00886278]]
[ 0.91367543]
state [[-1.13444555]
[-0.00652206]]
[ 1.08935845]
state [[-1.13846338]
[-0.00401785]]
[ 1.27234459]
state [[-1.13980198]
[-0.00133863]]
[ 1.46314979]
state [[-1.13827336]
[ 0.00152867]]
[ 1.66237783]
state [[-1.1336751 ]
[ 0.00459831]]
[ 1.86970806]
state [[-1.12579072]
[ 0.00788435]]
[ 2.06766891]
state [[-1.11447549]
[ 0.01131519]]
[ 2.21565843]
state [[-1.09971106]
[ 0.01476444]]
[ 2.20532417]
state [[-1.08147764]
[ 0.01823348]]
[ 2.15804601]
state [[-1.05975735]
[ 0.02172027]]
[ 2.06319332]
state [[-1.03453887]
[ 0.0252185 ]]
[ 1.92785811]
state [[-1.0058943 ]
[ 0.02864455]]
[ 1.79090047]
state [[-0.97397804]
[ 0.03191629]]
[ 1.65021968]
state [[-0.93897158]
[ 0.03500644]]
[ 1.51060581]
state [[-0.90108514]
[ 0.03788643]]
[ 1.37812996]
state [[-0.86055696]
[ 0.0405282 ]]
[ 1.27347398]
state [[-0.81763703]
[ 0.04291992]]
[ 1.18232012]
state [[-0.77260458]
[ 0.04503245]]
[ 1.14114165]
state [[-0.72573239]
[ 0.04687222]]
[ 1.1472733]
state [[-0.67728812]
[ 0.04844428]]
[ 1.24019098]
state [[-0.62749135]
[ 0.04979673]]
[ 1.34273553]
state [[-0.57658523]
[ 0.05090611]]
[ 1.45488405]
state [[-0.52482849]
[ 0.05175672]]
[ 1.52800393]
state [[-0.47253454]
[ 0.05229395]]
[ 1.60658193]
state [[-0.42001548]
[ 0.05251904]]
[ 1.66818333]
state [[-0.36759269]
[ 0.0524228 ]]
[ 1.70074797]
state [[-0.31559694]
[ 0.05199575]]
[ 1.78377748]
state [[-0.26427814]
[ 0.0513188 ]]
[ 1.94049048]
state [[-0.21377343]
[ 0.05050471]]
[ 2.0782547]
state [[-0.16427198]
[ 0.04950145]]
[ 2.09727001]
state [[-0.11597304]
[ 0.04829894]]
[ 1.93076444]
state [[-0.06909355]
[ 0.04687949]]
[ 1.69170165]
state [[-0.02396884]
[ 0.04512471]]
[ 1.52474213]
state [[ 0.01918707]
[ 0.04315591]]
[ 1.43006253]
state [[ 0.06027719]
[ 0.04109012]]
[ 1.35217357]
state [[ 0.09926024]
[ 0.03898305]]
[ 1.31643128]
state [[ 0.13616975]
[ 0.03690951]]
[ 1.29315615]
state [[ 0.17107813]
[ 0.03490838]]
[ 1.30407405]
state [[ 0.20411268]
[ 0.03303455]]
[ 1.30577326]
state [[ 0.23540723]
[ 0.03129456]]
[ 1.3211267]
state [[ 0.26512086]
[ 0.02971364]]
[ 1.34163809]
state [[ 0.29342607]
[ 0.02830521]]
[ 1.34916925]
state [[ 0.3204881 ]
[ 0.02706204]]
[ 1.34545207]
state [[ 0.34646478]
[ 0.02597669]]
[ 1.33424473]
state [[ 0.37150887]
[ 0.02504408]]
[ 1.32211137]
state [[ 0.39577356]
[ 0.02426469]]
[ 1.30933046]
state [[ 0.4194122 ]
[ 0.02363865]]
[ 1.29562449]
state [[ 0.44257772]
[ 0.02316554]]
[ 1.27948022]
state [[ 0.46542105]
[ 0.02284333]]
[ 1.26119518]
state [[ 0.48809144]
[ 0.0226704 ]]
[ 1.24144125]
episode length using learned policy: 157
In [ ]:
Content source: febert/DeepRL
Similar notebooks: