In [1]:
from mountaincar_nn import *


[2016-07-01 15:09:43,372] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np

In [3]:
from train_mu_offline import *

In [4]:
t1 = mu_offline_training(use_batchnorm= True)
t1.start_training()


[2016-07-01 15:10:02,267] Making new env: MountainCarContinuous-v0
moutaincar_dpg.py:375: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  policy_vals = np.zeros((resolution, resolution))
moutaincar_dpg.py:126: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  grid = np.zeros(np.ones(obs_dim)*self.tile_resolution)
action limits (array([ 0.]), array([ 2.]))
N_0 50.0
using environment MountainCarContinuous-v0
tile resolution 10.0
gamma 0.99
[ 1.          0.67835728  0.62276976  0.62209078  0.8526459   2.86599393
  2.887229    1.02836613  1.          1.          0.93533027 -0.38809582
  0.6045682   1.0698471   1.14473635  1.17997436  1.55615993  2.59707689
  0.74289175  1.          0.44594718  0.10847247  0.71581806  0.92107525
  1.09654035  1.23673655  1.43051834  1.04543336  0.61158972  1.
  0.06202713 -0.14023047  0.46513577 -0.81585296  2.23711758  3.71232037
  1.85801243  1.22226744  2.06895135  1.00617072  1.00455976 -0.18160138
  0.04829047  0.23244138 -0.96749015  1.25475142  3.37298761  1.31216807
  1.89625168  1.05661102  0.99939357  0.77621888  0.80518055  0.11962489
  0.58241356  1.45858352  1.57595806  1.40786321  3.42616224  0.99983717
  1.          1.04137001  0.27077842  0.42664555  0.81870769  0.94528861
  1.1842359   1.43590492  1.54714103  1.          1.          1.
  1.09332226  0.27452517  0.69129624  0.9348839   1.64996572  1.36786012
  1.00271191  1.          1.          1.          1.          0.97979885
  0.2292072   1.13224935  1.97769483  1.31925871  1.          1.          1.
  1.          1.          1.          1.11007973  0.9828954   1.29067028
  1.0021732   1.          1.        ]
[20001, 12755, 10774, 10457, 3797, 4972, 3839, 963, 2246, 2066, 2250, 1670, 2426, 689, 622, 2121, 1355, 2652, 1379, 2178, 1830, 1402, 573, 799, 1009, 521, 1572, 610, 768, 842, 1365, 1334, 923, 2560, 830, 627, 1124, 370, 885, 1257, 808, 725, 911, 387, 407, 684, 485, 845, 694, 480, 638, 754, 430, 475, 540, 460, 514, 834, 407, 634, 472, 586, 716, 440, 563, 813, 526, 461, 576, 465, 405, 481, 280, 1150, 744, 464, 529, 320, 604, 423, 1011, 467, 535, 359, 456, 379, 290, 459, 442, 718, 479, 348, 513, 631, 370, 386, 374, 848, 533, 375, 357, 290, 295, 290, 318, 563, 305, 579, 320, 288, 497, 372, 285, 396, 483, 288, 288, 366, 376, 313, 419, 398, 295, 368, 370, 398, 510, 407, 474, 450, 276, 207, 304, 374, 358, 412, 262, 397, 374, 382, 379, 367, 288, 459, 287, 483, 287, 370, 389, 321, 371, 442, 286, 328, 282, 203, 291, 239, 315, 375, 287, 418, 278, 296, 455, 327, 289, 288, 511, 214, 281, 359, 392, 310, 239, 342, 380, 306, 339, 300, 242, 289, 295, 283, 298, 211, 376, 310, 347, 230, 232, 278, 219, 287, 314, 277, 286, 280, 295, 294, 330, 287, 280, 282, 231, 319, 301, 210, 315, 331, 291, 230, 363, 331, 322, 477, 324, 211, 232, 292, 279, 290, 170, 279, 292, 214, 286, 186, 303, 401, 279, 194, 290, 193, 228, 175, 289, 323, 283, 232, 373, 225, 201, 269, 275, 231, 295, 226, 277, 242, 206, 193, 291, 197, 195, 196, 285, 251, 376, 276, 199, 275, 235, 281, 244, 241, 291, 241, 304, 222, 203, 306, 268, 221, 202, 274, 297, 280, 367, 250, 245, 197, 224, 198, 188, 253, 307, 214, 204, 189, 160, 279, 212, 274, 272, 191]
[20001, 12755, 10774, 10457, 3797, 4972, 3839, 963, 2246, 2066, 2250, 1670, 2426, 689, 622, 2121, 1355, 2652, 1379, 2178, 1830, 1402, 573, 799, 1009, 521, 1572, 610, 768, 842, 1365, 1334, 923, 2560, 830, 627, 1124, 370, 885, 1257, 808, 725, 911, 387, 407, 684, 485, 845, 694, 480, 638, 754, 430, 475, 540, 460, 514, 834, 407, 634, 472, 586, 716, 440, 563, 813, 526, 461, 576, 465, 405, 481, 280, 1150, 744, 464, 529, 320, 604, 423, 1011, 467, 535, 359, 456, 379, 290, 459, 442, 718, 479, 348, 513, 631, 370, 386, 374, 848, 533, 375, 357, 290, 295, 290, 318, 563, 305, 579, 320, 288, 497, 372, 285, 396, 483, 288, 288, 366, 376, 313, 419, 398, 295, 368, 370, 398, 510, 407, 474, 450, 276, 207, 304, 374, 358, 412, 262, 397, 374, 382, 379, 367, 288, 459, 287, 483, 287, 370, 389, 321, 371, 442, 286, 328, 282, 203, 291, 239, 315, 375, 287, 418, 278, 296, 455, 327, 289, 288, 511, 214, 281, 359, 392, 310, 239, 342, 380, 306, 339, 300, 242, 289, 295, 283, 298, 211, 376, 310, 347, 230, 232, 278, 219, 287, 314, 277, 286, 280, 295, 294, 330, 287, 280, 282, 231, 319, 301, 210, 315, 331, 291, 230, 363, 331, 322, 477, 324, 211, 232, 292, 279, 290, 170, 279, 292, 214, 286, 186, 303, 401, 279, 194, 290, 193, 228, 175, 289, 323, 283, 232, 373, 225, 201, 269, 275, 231, 295, 226, 277, 242, 206, 193, 291, 197, 195, 196, 285, 251, 376, 276, 199, 275, 235, 281, 244, 241, 291, 241, 304, 222, 203, 306, 268, 221, 202, 274, 297, 280, 367, 250, 245, 197, 224, 198, 188, 253, 307, 214, 204, 189, 160, 279, 212, 274, 272, 191]
result after minibatch no. 500 : mean squared error: 0.497341305017
batch train data [[ -7.97164409e-01  -6.78202638e-02]
 [ -1.00923210e+00  -3.47820658e-02]
 [ -9.59849052e-01  -8.79590286e-03]
 [ -5.89818509e-01   3.74665167e-02]
 [  3.06526705e-01   4.87798747e-02]
 [ -1.93261024e-01   4.88378012e-03]
 [ -1.02765046e+00   1.11913252e-03]
 [ -5.25424805e-01  -2.97713053e-02]
 [  3.58121057e-01   5.33460586e-02]
 [  5.26561841e-02   4.49640692e-02]
 [ -3.75407369e-01  -2.03657189e-03]
 [  3.83927301e-01   1.91929942e-02]
 [  5.53191610e-01   4.85076679e-02]
 [ -3.31405717e-01  -5.42281491e-02]
 [  3.88307910e-01   2.72300885e-02]
 [ -2.25038050e-01   3.01189749e-02]
 [ -1.03577695e+00  -2.56944874e-02]
 [ -1.05990452e+00   2.46364728e-02]
 [ -9.26188034e-01   4.45322792e-02]
 [ -4.70523233e-01  -1.04877740e-03]
 [ -1.92730057e-01  -5.31900906e-02]
 [ -1.02652257e+00  -4.36389696e-03]
 [  5.11426170e-01  -6.91379244e-02]
 [ -2.48000718e-01  -4.82439432e-02]
 [ -1.87445633e-01   2.56464248e-02]
 [ -6.36712104e-01  -2.74465237e-02]
 [ -6.68376881e-01  -4.97914406e-02]
 [ -1.16804106e+00   5.66242696e-02]
 [ -8.41452676e-01   7.34989186e-04]
 [ -9.21266673e-02   1.99852477e-02]
 [ -5.24420820e-01   4.28142203e-02]
 [ -3.43590282e-01   6.01588649e-02]
 [  2.22275393e-01  -3.95295911e-02]
 [ -6.74282452e-01   1.39011661e-02]
 [ -2.23172247e-01  -1.66934710e-02]
 [  1.50741481e-01   6.50529415e-03]
 [ -3.52083187e-01   2.73469175e-02]
 [  5.80841590e-01  -5.25672527e-02]
 [ -1.16866629e+00   6.49670241e-02]
 [  2.56550856e-01   4.97620748e-02]
 [ -1.04466312e-01  -1.42064218e-02]
 [ -1.01346018e+00   5.95261913e-02]
 [ -4.34582674e-01   1.74197090e-02]
 [  3.15240742e-01   5.22158460e-03]
 [ -1.50714122e-01   4.07610836e-02]
 [ -2.44219962e-01  -4.57716078e-02]
 [  4.60677308e-02   6.17687015e-02]
 [ -6.58940797e-01   4.82888047e-02]
 [ -4.80523862e-01   1.02803133e-02]
 [  3.32799371e-01  -6.44141100e-03]]
batch train labels [[ 0.44594718]
 [ 0.6045682 ]
 [ 1.14473635]
 [ 1.22226744]
 [ 1.        ]
 [ 1.45858352]
 [ 2.86599393]
 [ 0.46513577]
 [ 1.        ]
 [ 1.54714103]
 [-0.96749015]
 [ 1.97769483]
 [ 1.        ]
 [-0.18160138]
 [ 1.97769483]
 [ 1.40786321]
 [ 0.62209078]
 [ 2.887229  ]
 [ 0.74289175]
 [-0.96749015]
 [ 0.77621888]
 [ 0.8526459 ]
 [ 1.        ]
 [ 0.77621888]
 [ 1.57595806]
 [-0.81585296]
 [ 0.10847247]
 [ 1.        ]
 [ 1.17997436]
 [ 1.1842359 ]
 [ 2.06895135]
 [ 1.05661102]
 [ 1.09332226]
 [ 1.23673655]
 [ 0.11962489]
 [ 0.9348839 ]
 [ 3.37298761]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.42664555]
 [ 1.        ]
 [ 3.37298761]
 [ 1.13224935]
 [ 1.40786321]
 [ 0.77621888]
 [ 1.        ]
 [ 2.06895135]
 [ 3.71232037]
 [ 0.2292072 ]]
plotting the mu() policy learned by NN
[2016-07-01 15:10:13,466] Observation '[[ -4.89154845e-01]
 [  1.97596994e-04]]' is not contained within observation space 'Box(2,)'.
state [-0.48935245  0.        ]
[ 1.45399284]
state [[ -4.89154845e-01]
 [  1.97596994e-04]]
[ 1.4676019]
state [[ -4.88747507e-01]
 [  4.07329004e-04]]
[ 1.4812485]
state [[-0.48811984]
 [ 0.00062767]]
[ 1.49477792]
state [[-0.48726299]
 [ 0.00085686]]
[ 1.508026]
state [[-0.48617008]
 [ 0.0010929 ]]
[ 1.52082062]
state [[-0.48483649]
 [ 0.0013336 ]]
[ 1.53298128]
state [[-0.48325998]
 [ 0.00157652]]
[ 1.54432225]
state [[-0.48144093]
 [ 0.00181904]]
[ 1.5532347]
state [[-0.47938401]
 [ 0.00205693]]
[ 1.56076872]
state [[-0.47709695]
 [ 0.00228706]]
[ 1.56671083]
state [[-0.47459081]
 [ 0.00250614]]
[ 1.56950462]
state [[-0.47188139]
 [ 0.00270941]]
[ 1.57028186]
state [[-0.46898803]
 [ 0.00289336]]
[ 1.56803358]
state [[-0.4659344 ]
 [ 0.00305364]]
[ 1.56366849]
state [[-0.46274742]
 [ 0.00318698]]
[ 1.55711639]
state [[-0.45945719]
 [ 0.00329025]]
[ 1.54832828]
state [[-0.45609671]
 [ 0.00336047]]
[ 1.53727937]
state [[-0.45270178]
 [ 0.00339494]]
[ 1.5247457]
state [[-0.44930983]
 [ 0.00339196]]
[ 1.51158309]
state [[-0.44595885]
 [ 0.00335098]]
[ 1.49586892]
state [[-0.44268906]
 [ 0.0032698 ]]
[ 1.47761428]
state [[-0.43954253]
 [ 0.00314653]]
[ 1.45684493]
state [[-0.43656293]
 [ 0.00297961]]
[ 1.43361759]
state [[-0.43379506]
 [ 0.00276785]]
[ 1.40808654]
state [[-0.43128455]
 [ 0.00251053]]
[ 1.37978375]
state [[-0.42907777]
 [ 0.00220677]]
[ 1.34626341]
state [[-0.42722419]
 [ 0.00185359]]
[ 1.31039929]
state [[-0.42577296]
 [ 0.00145121]]
[ 1.27324867]
state [[-0.42477173]
 [ 0.00100125]]
[ 1.23588383]
state [[-0.42426497]
 [ 0.00050674]]
[ 1.19430661]
state [[ -4.24297929e-01]
 [ -3.29740578e-05]]
[ 1.15108645]
state [[-0.42491361]
 [-0.00061568]]
[ 1.10717356]
state [[-0.42615148]
 [-0.00123788]]
[ 1.06303549]
state [[-0.42804682]
 [-0.00189533]]
[ 1.01918089]
state [[-0.43062985]
 [-0.00258302]]
[ 0.97615647]
state [[-0.43392497]
 [-0.00329512]]
[ 0.93454462]
state [[-0.43795002]
 [-0.00402505]]
[ 0.89514959]
state [[-0.44271523]
 [-0.00476522]]
[ 0.86080933]
state [[-0.44822031]
 [-0.0055051 ]]
[ 0.83099198]
state [[-0.45445496]
 [-0.00623464]]
[ 0.80637974]
state [[-0.46139809]
 [-0.00694313]]
[ 0.78528816]
state [[-0.46901974]
 [-0.00762164]]
[ 0.7648173]
state [[-0.47728407]
 [-0.00826434]]
[ 0.7502116]
state [[-0.48614445]
 [-0.00886037]]
[ 0.74114555]
state [[-0.49554399]
 [-0.00939955]]
[ 0.73564947]
state [[-0.50541806]
 [-0.00987406]]
[ 0.73742157]
state [[-0.51569098]
 [-0.01027292]]
[ 0.7460441]
state [[-0.52627718]
 [-0.01058618]]
[ 0.76150888]
state [[-0.53708178]
 [-0.01080459]]
[ 0.78411072]
state [[-0.54800117]
 [-0.01091938]]
[ 0.81289232]
state [[-0.55892479]
 [-0.01092363]]
[ 0.84898943]
state [[-0.56973499]
 [-0.0108102 ]]
[ 0.89078784]
state [[-0.58030951]
 [-0.01057449]]
[ 0.93847668]
state [[-0.59052223]
 [-0.01021273]]
[ 0.99479914]
state [[-0.6002416 ]
 [-0.00971937]]
[ 1.06095719]
state [[-0.60933024]
 [-0.00908864]]
[ 1.1350987]
state [[-0.61764789]
 [-0.00831763]]
[ 1.21103966]
state [[-0.62505841]
 [-0.00741054]]
[ 1.28594661]
state [[-0.63143373]
 [-0.00637534]]
[ 1.35643566]
state [[-0.63665789]
 [-0.00522418]]
[ 1.41823077]
state [[-0.64063203]
 [-0.00397417]]
[ 1.48050356]
state [[-0.64326584]
 [-0.00263384]]
[ 1.54354775]
state [[-0.64447778]
 [-0.00121194]]
[ 1.60305083]
state [[ -6.44199848e-01]
 [  2.77962885e-04]]
[ 1.66914368]
state [[-0.64236784]
 [ 0.00183201]]
[ 1.74117267]
state [[-0.63892263]
 [ 0.00344522]]
[ 1.81520116]
state [[-0.63381445]
 [ 0.0051082 ]]
[ 1.88258195]
state [[-0.62701201]
 [ 0.00680242]]
[ 1.94709766]
state [[-0.61849928]
 [ 0.00851273]]
[ 1.99845946]
state [[-0.6082859 ]
 [ 0.01021337]]
[ 2.02001953]
state [[-0.59644419]
 [ 0.01184171]]
[ 2.03985977]
state [[-0.5830605 ]
 [ 0.01338372]]
[ 2.061131]
state [[-0.56823319]
 [ 0.01482732]]
[ 2.08677554]
state [[-0.55207211]
 [ 0.01616108]]
[ 2.11229753]
state [[-0.53469777]
 [ 0.01737437]]
[ 2.12007475]
state [[-0.51624018]
 [ 0.01845759]]
[ 2.12015772]
state [[-0.49683776]
 [ 0.01940241]]
[ 2.11862659]
state [[-0.47663584]
 [ 0.02020192]]
[ 2.11900949]
state [[-0.45578498]
 [ 0.02085086]]
[ 2.12339568]
state [[-0.43443921]
 [ 0.02134576]]
[ 2.13058949]
state [[-0.41275421]
 [ 0.02168501]]
[ 2.1379447]
state [[-0.39088529]
 [ 0.02186891]]
[ 2.11188602]
state [[-0.36898565]
 [ 0.02189965]]
[ 2.07888842]
state [[-0.34720448]
 [ 0.02178118]]
[ 2.02200246]
state [[-0.32568535]
 [ 0.02151911]]
[ 1.96119642]
state [[-0.3046037 ]
 [ 0.02108165]]
[ 1.88524187]
state [[-0.28416365]
 [ 0.02044006]]
[ 1.79912448]
state [[-0.26456976]
 [ 0.01959391]]
[ 1.710392]
state [[-0.24601847]
 [ 0.01855129]]
[ 1.63184869]
state [[-0.22868478]
 [ 0.01733369]]
[ 1.55272114]
state [[-0.21273275]
 [ 0.01595203]]
[ 1.48924494]
state [[-0.19829939]
 [ 0.01443335]]
[ 1.42644298]
state [[-0.18551011]
 [ 0.01278928]]
[ 1.34886146]
state [[-0.17449471]
 [ 0.01101541]]
[ 1.27954304]
state [[-0.16536497]
 [ 0.00912974]]
[ 1.20916033]
state [[-0.15822469]
 [ 0.00714028]]
[ 1.1254828]
state [[-0.15318252]
 [ 0.00504216]]
[ 1.04072833]
state [[-0.15034026]
 [ 0.00284226]]
[ 0.95400673]
state [[-0.149794  ]
 [ 0.00054626]]
[ 0.85403168]
state [[-0.1516455]
 [-0.0018515]]
[ 0.74172854]
state [[-0.15600099]
 [-0.0043555 ]]
[ 0.62716573]
state [[-0.1629605 ]
 [-0.00695951]]
[ 0.49289092]
state [[-0.17263426]
 [-0.00967376]]
[ 0.36739182]
state [[-0.18511277]
 [-0.01247852]]
[ 0.27979702]
state [[-0.2004358 ]
 [-0.01532303]]
[ 0.25043076]
state [[-0.21856989]
 [-0.01813409]]
[ 0.24488688]
state [[-0.23944063]
 [-0.02087074]]
[ 0.27740929]
state [[-0.26291624]
 [-0.02347561]]
[ 0.29812339]
state [[-0.28885555]
 [-0.02593932]]
[ 0.30250067]
state [[-0.31711099]
 [-0.02825544]]
[ 0.29809031]
state [[-0.34751981]
 [-0.03040884]]
[ 0.26403618]
state [[-0.37992465]
 [-0.03240484]]
[ 0.21001799]
state [[-0.41416398]
 [-0.03423932]]
[ 0.14926517]
state [[-0.45006013]
 [-0.03589615]]
[ 0.11170115]
state [[-0.48739165]
 [-0.03733153]]
[ 0.12955034]
state [[-0.52586466]
 [-0.038473  ]]
[ 0.18821153]
state [[-0.56513244]
 [-0.03926779]]
[ 0.26281732]
state [[-0.60482669]
 [-0.03969428]]
[ 0.31775868]
state [[-0.64460003]
 [-0.03977332]]
[ 0.36201727]
state [[-0.68412364]
 [-0.03952359]]
[ 0.40595961]
state [[-0.72308332]
 [-0.03895969]]
[ 0.46113184]
state [[-0.76117349]
 [-0.03809015]]
[ 0.50612468]
state [[-0.79812276]
 [-0.03694928]]
[ 0.54107523]
state [[-0.83369702]
 [-0.03557426]]
[ 0.5794608]
state [[-0.86768734]
 [-0.03399031]]
[ 0.61941868]
state [[-0.89991206]
 [-0.03222473]]
[ 0.6620872]
state [[-0.93021482]
 [-0.03030275]]
[ 0.69497317]
state [[-0.95847499]
 [-0.02826016]]
[ 0.71488827]
state [[-0.98460829]
 [-0.0261333 ]]
[ 0.72271246]
state [[-1.0085628 ]
 [-0.02395453]]
[ 0.72426713]
state [[-1.0303098 ]
 [-0.02174704]]
[ 0.73960996]
state [[-1.04982042]
 [-0.01951064]]
[ 0.76504427]
state [[-1.06706607]
 [-0.01724567]]
[ 0.79581082]
state [[-1.0820204]
 [-0.0149543]]
[ 0.82765263]
state [[-1.09466064]
 [-0.01264028]]
[ 0.85975057]
state [[-1.10496652]
 [-0.01030583]]
[ 0.8948698]
state [[-1.11291492]
 [-0.00794841]]
[ 0.95584047]
state [[-1.11845589]
 [-0.00554099]]
[ 1.04231954]
state [[-1.12151146]
 [-0.00305558]]
[ 1.14815998]
state [[ -1.12198079e+00]
 [ -4.69294842e-04]]
[ 1.26677179]
state [[-1.11974597]
 [ 0.00223482]]
[ 1.40466595]
state [[-1.11466551]
 [ 0.00508051]]
[ 1.54654884]
state [[-1.10658944]
 [ 0.00807603]]
[ 1.69615591]
state [[-1.09535682]
 [ 0.0112326 ]]
[ 1.82103276]
state [[-1.08082926]
 [ 0.01452759]]
[ 1.91005015]
state [[-1.06290436]
 [ 0.01792493]]
[ 1.96082079]
state [[-1.04152143]
 [ 0.02138297]]
[ 1.93498993]
state [[-1.01670384]
 [ 0.0248176 ]]
[ 1.87701082]
state [[-0.98851967]
 [ 0.02818416]]
[ 1.76854169]
state [[-0.95710564]
 [ 0.03141406]]
[ 1.65950978]
state [[-0.9226228 ]
 [ 0.03448281]]
[ 1.55372369]
state [[-0.88525885]
 [ 0.03736397]]
[ 1.45910609]
state [[-0.84522504]
 [ 0.04003381]]
[ 1.36636269]
state [[-0.8027699 ]
 [ 0.04245513]]
[ 1.27794611]
state [[-0.75817937]
 [ 0.04459053]]
[ 1.26264024]
state [[-0.71170849]
 [ 0.04647085]]
[ 1.28282201]
state [[-0.66361767]
 [ 0.0480908 ]]
[ 1.33445764]
state [[-0.61417288]
 [ 0.04944479]]
[ 1.39154661]
state [[-0.56366557]
 [ 0.05050731]]
[ 1.44904864]
state [[-0.51240945]
 [ 0.05125614]]
[ 1.50300848]
state [[-0.46073422]
 [ 0.05167524]]
[ 1.52589905]
state [[-0.40900177]
 [ 0.05173245]]
[ 1.57586896]
state [[-0.35753611]
 [ 0.05146567]]
[ 1.68121815]
state [[-0.30658382]
 [ 0.0509523 ]]
[ 1.78724825]
state [[-0.25635931]
 [ 0.05022451]]
[ 1.86548579]
state [[-0.2070657 ]
 [ 0.04929361]]
[ 1.90897322]
state [[-0.15889607]
 [ 0.04816963]]
[ 1.87446404]
state [[-0.11207328]
 [ 0.04682279]]
[ 1.75029409]
state [[-0.06686021]
 [ 0.04521307]]
[ 1.59914041]
state [[-0.02349788]
 [ 0.04336233]]
[ 1.48269701]
state [[ 0.01785335]
 [ 0.04135124]]
[ 1.43003273]
state [[ 0.05713821]
 [ 0.03928486]]
[ 1.39204776]
state [[ 0.09435175]
 [ 0.03721354]]
[ 1.3833642]
state [[ 0.12954815]
 [ 0.03519639]]
[ 1.39972341]
state [[ 0.16283071]
 [ 0.03328256]]
[ 1.42482388]
state [[ 0.19433048]
 [ 0.03149978]]
[ 1.45190954]
state [[ 0.22419512]
 [ 0.02986464]]
[ 1.47354007]
state [[ 0.25257778]
 [ 0.02838264]]
[ 1.48035944]
state [[ 0.27962479]
 [ 0.02704701]]
[ 1.47312772]
state [[ 0.30547419]
 [ 0.02584939]]
[ 1.46018779]
state [[ 0.33026212]
 [ 0.02478792]]
[ 1.44439828]
state [[ 0.35412437]
 [ 0.02386224]]
[ 1.41477597]
state [[ 0.37718439]
 [ 0.02306001]]
[ 1.38616776]
state [[ 0.39956743]
 [ 0.02238305]]
[ 1.36028767]
state [[ 0.42140183]
 [ 0.02183442]]
[ 1.33634007]
state [[ 0.44281805]
 [ 0.02141623]]
[ 1.31379545]
state [[ 0.46394813]
 [ 0.02113008]]
[ 1.29190838]
state [[ 0.48492512]
 [ 0.020977  ]]
[ 1.27145255]
episode length using learned policy: 189
result after minibatch no. 1000 : mean squared error: 0.198283180594
batch train data [[ 0.19408099  0.01538069]
 [-0.26362413  0.06752013]
 [-0.69951605 -0.05385464]
 [-0.54738126  0.06748359]
 [-0.21778464 -0.04865082]
 [ 0.54335574 -0.00828046]
 [ 0.16748455  0.00280113]
 [-0.06892529 -0.00844221]
 [-0.10153063 -0.05968317]
 [-0.41294398 -0.0579001 ]
 [-0.66986299  0.03172919]
 [-0.0586107   0.05916397]
 [-0.08222334 -0.0135419 ]
 [-0.90261604 -0.06364673]
 [-0.97258324  0.06179676]
 [ 0.19117184  0.03567195]
 [-0.0080051   0.0375999 ]
 [-0.64827353  0.01346781]
 [-0.88261342 -0.00587628]
 [-0.75137338 -0.04300917]
 [-0.8687      0.03963439]
 [-0.22868821  0.00709976]
 [-1.13098404  0.06483648]
 [-0.74003207 -0.05979572]
 [-0.57187377  0.04138672]
 [-0.29682503 -0.00259221]
 [-0.66917254 -0.03213424]
 [ 0.07403914  0.06043526]
 [-0.70054166  0.03214374]
 [-1.0183053   0.02693183]
 [-0.99503129 -0.01189531]
 [-0.90651716 -0.02915869]
 [-0.35015333 -0.01805869]
 [ 0.52430298 -0.00194499]
 [-0.57880194  0.0648794 ]
 [ 0.49329809  0.05705563]
 [ 0.22506749  0.02533672]
 [-0.02199518 -0.0513784 ]
 [ 0.35352841 -0.03391049]
 [-1.05622524 -0.05788412]
 [ 0.05851025 -0.01808364]
 [-0.90433694  0.03211061]
 [ 0.18065786  0.02464932]
 [-0.82108     0.00553891]
 [-0.70791267  0.02753616]
 [ 0.52870181 -0.03133109]
 [-0.20636319 -0.038933  ]
 [-0.84686268  0.03173926]
 [ 0.22102241 -0.00582173]
 [-0.29220623 -0.03172117]]
batch train labels [[ 1.64996572]
 [ 0.99983717]
 [ 0.10847247]
 [ 1.00617072]
 [ 0.77621888]
 [ 1.11007973]
 [ 0.9348839 ]
 [ 0.81870769]
 [ 1.        ]
 [ 1.00455976]
 [ 1.04543336]
 [ 1.        ]
 [ 0.81870769]
 [ 0.93533027]
 [ 1.        ]
 [ 1.36786012]
 [ 1.43590492]
 [ 3.71232037]
 [ 1.14473635]
 [ 0.10847247]
 [ 2.59707689]
 [ 1.45858352]
 [ 1.        ]
 [ 0.44594718]
 [ 1.22226744]
 [ 0.58241356]
 [ 0.71581806]
 [ 1.        ]
 [ 1.04543336]
 [ 1.55615993]
 [ 1.14473635]
 [ 0.6045682 ]
 [ 0.23244138]
 [ 1.11007973]
 [ 1.00617072]
 [ 1.        ]
 [ 1.64996572]
 [ 1.04137001]
 [ 1.        ]
 [ 1.        ]
 [ 0.42664555]
 [ 2.59707689]
 [ 1.64996572]
 [ 1.23673655]
 [ 1.43051834]
 [ 1.        ]
 [ 0.80518055]
 [ 2.59707689]
 [ 0.69129624]
 [ 0.80518055]]
plotting the mu() policy learned by NN
state [-0.400759  0.      ]
[ 0.8454169]
state [[-0.40181419]
 [-0.00105517]]
[ 0.74972928]
state [[-0.40401283]
 [-0.00219864]]
[ 0.65955544]
state [[-0.4074297 ]
 [-0.00341686]]
[ 0.56788349]
state [[-0.41213241]
 [-0.00470272]]
[ 0.47617376]
state [[-0.41817945]
 [-0.00604705]]
[ 0.38765293]
state [[-0.42561638]
 [-0.00743693]]
[ 0.31512311]
state [[-0.43446252]
 [-0.00884613]]
[ 0.24732837]
state [[-0.44472191]
 [-0.01025939]]
[ 0.18647118]
state [[-0.4563809 ]
 [-0.01165899]]
[ 0.1339272]
state [[-0.46940669]
 [-0.01302578]]
[ 0.09544826]
state [[-0.48374167]
 [-0.01433499]]
[ 0.06162274]
state [[-0.49931327]
 [-0.01557158]]
[ 0.05248022]
state [[-0.51601434]
 [-0.01670108]]
[ 0.06062867]
state [[-0.53371167]
 [-0.01769733]]
[ 0.08152326]
state [[-0.55225164]
 [-0.01853998]]
[ 0.11243049]
state [[-0.57146454]
 [-0.01921291]]
[ 0.15268403]
state [[-0.59116703]
 [-0.01970247]]
[ 0.20369369]
state [[-0.61116248]
 [-0.01999548]]
[ 0.26516563]
state [[-0.63124359]
 [-0.02008111]]
[ 0.33433223]
state [[-0.65119702]
 [-0.0199534 ]]
[ 0.40573651]
state [[-0.67081088]
 [-0.01961388]]
[ 0.47530657]
state [[-0.68988091]
 [-0.01907002]]
[ 0.5444206]
state [[-0.70821047]
 [-0.01832957]]
[ 0.61750972]
state [[-0.72560763]
 [-0.01739718]]
[ 0.69116521]
state [[-0.74188966]
 [-0.016282  ]]
[ 0.75286794]
state [[-0.75689614]
 [-0.01500648]]
[ 0.82314968]
state [[-0.77046913]
 [-0.013573  ]]
[ 0.88408768]
state [[-0.78247118]
 [-0.01200206]]
[ 0.93808472]
state [[-0.79278296]
 [-0.0103118 ]]
[ 0.99054658]
state [[-0.80129772]
 [-0.00851476]]
[ 1.05511117]
state [[-0.80790734]
 [-0.0066096 ]]
[ 1.12503552]
state [[-0.81250888]
 [-0.00460155]]
[ 1.19931185]
state [[-0.8150056]
 [-0.0024967]]
[ 1.28484249]
state [[ -8.15299869e-01]
 [ -2.94243684e-04]]
[ 1.36569667]
state [[-0.81330937]
 [ 0.00199048]]
[ 1.44791555]
state [[-0.80896157]
 [ 0.00434781]]
[ 1.53157282]
state [[-0.802194 ]
 [ 0.0067676]]
[ 1.62334299]
state [[-0.79294848]
 [ 0.0092455 ]]
[ 1.71142864]
state [[-0.7811842 ]
 [ 0.01176428]]
[ 1.78872216]
state [[-0.76688594]
 [ 0.01429828]]
[ 1.8229239]
state [[-0.75009781]
 [ 0.01678812]]
[ 1.81457436]
state [[-0.73092413]
 [ 0.0191737 ]]
[ 1.77558672]
state [[-0.70951825]
 [ 0.02140589]]
[ 1.74696541]
state [[-0.68604219]
 [ 0.02347607]]
[ 1.73255277]
state [[-0.66066289]
 [ 0.0253793 ]]
[ 1.70939827]
state [[-0.63357496]
 [ 0.02708795]]
[ 1.71081376]
state [[-0.60496628]
 [ 0.02860871]]
[ 1.73272872]
state [[-0.57502061]
 [ 0.02994565]]
[ 1.7695924]
state [[-0.54392123]
 [ 0.03109938]]
[ 1.8160634]
state [[-0.51185346]
 [ 0.03206776]]
[ 1.86594677]
state [[-0.47900781]
 [ 0.03284564]]
[ 1.90997207]
state [[-0.44558564]
 [ 0.03342218]]
[ 1.9492389]
state [[-0.41179401]
 [ 0.03379164]]
[ 1.97910154]
state [[-0.37784615]
 [ 0.03394784]]
[ 1.99951208]
state [[-0.34395742]
 [ 0.03388871]]
[ 2.04470634]
state [[-0.31035173]
 [ 0.03360568]]
[ 2.10001659]
state [[-0.27723852]
 [ 0.03311321]]
[ 2.10783911]
state [[-0.24480933]
 [ 0.03242919]]
[ 2.00920987]
state [[-0.21323568]
 [ 0.03157365]]
[ 1.88880587]
state [[-0.18277889]
 [ 0.03045678]]
[ 1.75638711]
state [[-0.1536992 ]
 [ 0.02907969]]
[ 1.60263145]
state [[-0.1262558 ]
 [ 0.02744341]]
[ 1.47888064]
state [[-0.10065631]
 [ 0.02559949]]
[ 1.40678418]
state [[-0.07703692]
 [ 0.02361939]]
[ 1.36455178]
state [[-0.05548651]
 [ 0.02155041]]
[ 1.35784924]
state [[-0.0360437 ]
 [ 0.01944281]]
[ 1.33502984]
state [[-0.01875125]
 [ 0.01729244]]
[ 1.29567146]
state [[-0.00365918]
 [ 0.01509207]]
[ 1.24375355]
state [[ 0.00917679]
 [ 0.01283597]]
[ 1.19453537]
state [[ 0.01970825]
 [ 0.01053146]]
[ 1.12120318]
state [[ 0.02786528]
 [ 0.00815703]]
[ 1.02793002]
state [[ 0.03355896]
 [ 0.00569369]]
[ 0.92969298]
state [[ 0.03669501]
 [ 0.00313604]]
[ 0.82264668]
state [[ 0.03716883]
 [ 0.00047382]]
[ 0.71289003]
state [[ 0.03487106]
 [-0.00229776]]
[ 0.61176306]
state [[ 0.02969873]
 [-0.00517233]]
[ 0.53471714]
state [[ 0.02157103]
 [-0.0081277 ]]
[ 0.4931739]
state [[ 0.01044174]
 [-0.01112929]]
[ 0.47299272]
state [[-0.00371333]
 [-0.01415507]]
[ 0.45618373]
state [[-0.02091207]
 [-0.01719873]]
[ 0.43597105]
state [[-0.04116991]
 [-0.02025785]]
[ 0.41978681]
state [[-0.06448893]
 [-0.02331902]]
[ 0.41140181]
state [[-0.0908499 ]
 [-0.02636097]]
[ 0.43993157]
state [[-0.12017865]
 [-0.02932876]]
[ 0.47995979]
state [[-0.15236673]
 [-0.03218807]]
[ 0.51214409]
state [[-0.18728599]
 [-0.03491927]]
[ 0.51103419]
state [[-0.22480989]
 [-0.0375239 ]]
[ 0.49400508]
state [[-0.26479244]
 [-0.03998255]]
[ 0.42859548]
state [[-0.30709821]
 [-0.04230578]]
[ 0.35510159]
state [[-0.35156086]
 [-0.04446265]]
[ 0.29218289]
state [[-0.3979651 ]
 [-0.04640423]]
[ 0.20603457]
state [[-0.4460834]
 [-0.0481183]]
[ 0.13115722]
state [[-0.49564669]
 [-0.04956328]]
[ 0.05594841]
state [[-0.54636341]
 [-0.05071673]]
[ 0.02568084]
state [[-0.59788388]
 [-0.05152044]]
[ 0.04049608]
state [[-0.64981127]
 [-0.05192741]]
[ 0.12528047]
state [[-0.70168924]
 [-0.05187799]]
[ 0.19046049]
state [[-0.75310373]
 [-0.0514145 ]]
[ 0.25736794]
state [[-0.80367237]
 [-0.05056865]]
[ 0.30408436]
state [[-0.85307497]
 [-0.04940259]]
[ 0.35497862]
state [[-0.90103465]
 [-0.0479597 ]]
[ 0.3920157]
state [[-0.94733888]
 [-0.0463042 ]]
[ 0.4557811]
state [[-0.99179864]
 [-0.04445977]]
[ 0.51783186]
state [[-1.03427505]
 [-0.04247638]]
[ 0.56992829]
state [[-1.07468343]
 [-0.04040833]]
[ 0.6156922]
state [[-1.11298454]
 [-0.03830113]]
[ 0.64913231]
state [[-1.14918506]
 [-0.03620053]]
[ 0.66365087]
state [[-1.18333805]
 [-0.03415298]]
[ 0.67717147]
state [-1.2  0. ]
[ 1.7080965]
state [[-1.19705009]
 [ 0.00294999]]
[ 1.87566102]
state [[-1.19097281]
 [ 0.00607725]]
[ 2.04678702]
state [[-1.18162453]
 [ 0.00934828]]
[ 2.21445584]
state [[-1.16897678]
 [ 0.01264773]]
[ 2.33149314]
state [[-1.15299404]
 [ 0.01598274]]
[ 2.36781287]
state [[-1.13363612]
 [ 0.01935787]]
[ 2.30819535]
state [[-1.11086178]
 [ 0.02277428]]
[ 2.20528436]
state [[-1.08463299]
 [ 0.02622882]]
[ 2.06964922]
state [[-1.05491996]
 [ 0.02971307]]
[ 1.91466725]
state [[-1.02179289]
 [ 0.03312707]]
[ 1.73796284]
state [[-0.98543513]
 [ 0.03635778]]
[ 1.5900197]
state [[-0.94603014]
 [ 0.039405  ]]
[ 1.48336959]
state [[-0.90375602]
 [ 0.04227411]]
[ 1.38340771]
state [[-0.8588264]
 [ 0.0449296]]
[ 1.27272677]
state [[-0.81151277]
 [ 0.04731365]]
[ 1.19182146]
state [[-0.7621066 ]
 [ 0.04940616]]
[ 1.17314494]
state [[-0.71088725]
 [ 0.05121934]]
[ 1.22188699]
state [[-0.65811414]
 [ 0.05277314]]
[ 1.30223596]
state [[-0.60405707]
 [ 0.05405708]]
[ 1.3824203]
state [[-0.54901999]
 [ 0.0550371 ]]
[ 1.45421827]
state [[-0.4933382 ]
 [ 0.05568179]]
[ 1.49174392]
state [[-0.43739131]
 [ 0.05594689]]
[ 1.53892195]
state [[-0.38154486]
 [ 0.05584644]]
[ 1.58752894]
state [[-0.32614434]
 [ 0.05540052]]
[ 1.67884076]
state [[-0.27146077]
 [ 0.05468356]]
[ 1.79155445]
state [[-0.21770145]
 [ 0.05375932]]
[ 1.88272452]
state [[-0.1650449 ]
 [ 0.05265654]]
[ 1.8440212]
state [[-0.11374411]
 [ 0.0513008 ]]
[ 1.74382043]
state [[-0.06405535]
 [ 0.04968876]]
[ 1.56315899]
state [[-0.01625742]
 [ 0.04779794]]
[ 1.44210553]
state [[ 0.0294856 ]
 [ 0.04574301]]
[ 1.3720727]
state [[ 0.07311046]
 [ 0.04362486]]
[ 1.31934845]
state [[ 0.11461456]
 [ 0.0415041 ]]
[ 1.31584132]
state [[ 0.15408084]
 [ 0.03946628]]
[ 1.34713769]
state [[ 0.19165662]
 [ 0.03757578]]
[ 1.37922776]
state [[ 0.22751361]
 [ 0.03585699]]
[ 1.39271331]
state [[ 0.26182339]
 [ 0.03430977]]
[ 1.40289545]
state [[ 0.29476842]
 [ 0.03294503]]
[ 1.41542029]
state [[ 0.32654428]
 [ 0.03177588]]
[ 1.41382253]
state [[ 0.35734066]
 [ 0.03079638]]
[ 1.39983368]
state [[ 0.38734099]
 [ 0.03000034]]
[ 1.38380504]
state [[ 0.41673142]
 [ 0.02939044]]
[ 1.36145902]
state [[ 0.44569546]
 [ 0.02896405]]
[ 1.34559715]
state [[ 0.47442615]
 [ 0.02873068]]
[ 1.33020258]
episode length using learned policy: 154
result after minibatch no. 1500 : mean squared error: 0.291077494621
batch train data [[-0.45674148  0.04850654]
 [ 0.07960274  0.05785116]
 [-0.63282794  0.05586188]
 [-1.01407494  0.0056222 ]
 [ 0.59575954 -0.00943674]
 [ 0.08577363 -0.05966252]
 [-0.54093458  0.00251209]
 [-1.11985041 -0.01634606]
 [ 0.15217417  0.06627439]
 [ 0.30109084  0.05462557]
 [-0.34926965  0.04066041]
 [ 0.06299382  0.02747415]
 [ 0.03052917  0.00383546]
 [-0.55201652  0.03899607]
 [-0.19382481  0.03184514]
 [ 0.5590733   0.02854913]
 [-1.03774226 -0.01445686]
 [ 0.2558647   0.06901892]
 [-0.25933297  0.03455281]
 [-0.53082896  0.0067104 ]
 [-0.86368971  0.02627921]
 [-0.966329   -0.06499921]
 [-0.56033131 -0.05465473]
 [-1.0252503  -0.06098615]
 [ 0.52246296 -0.06373296]
 [-0.70852903  0.00279722]
 [-0.02906379 -0.05610288]
 [-0.33932048 -0.06215446]
 [-0.3306278   0.06478642]
 [-0.59861868  0.0120857 ]
 [ 0.45942425  0.04877555]
 [-0.71169213  0.02310571]
 [-0.29433861  0.03517625]
 [-1.16071462 -0.03573918]
 [-0.85031473  0.03448067]
 [-0.23882077 -0.06870309]
 [-0.50455797 -0.06870677]
 [-0.19873122 -0.01613799]
 [-0.5936218  -0.03673533]
 [-0.45550651 -0.04837423]
 [-1.12067334 -0.03228243]
 [-0.68179396  0.0484669 ]
 [-0.05982913 -0.02082225]
 [ 0.48195846  0.01317989]
 [ 0.24833191 -0.04386057]
 [-1.17691581 -0.04661779]
 [-0.56383007 -0.04595739]
 [-0.14469759 -0.03191648]
 [-0.55750294 -0.05380017]
 [-0.66480739 -0.03915136]]
batch train labels [[ 1.89625168]
 [ 1.        ]
 [ 2.06895135]
 [ 1.17997436]
 [ 1.11007973]
 [ 1.        ]
 [ 3.71232037]
 [ 0.62209078]
 [ 1.        ]
 [ 1.        ]
 [ 1.31216807]
 [ 1.64996572]
 [ 0.94528861]
 [ 1.22226744]
 [ 1.40786321]
 [ 1.0021732 ]
 [ 0.62209078]
 [ 1.        ]
 [ 1.40786321]
 [ 3.71232037]
 [ 1.55615993]
 [ 0.93533027]
 [-0.14023047]
 [ 1.        ]
 [ 1.        ]
 [ 1.23673655]
 [ 1.        ]
 [ 1.00455976]
 [ 1.05661102]
 [ 3.71232037]
 [ 1.        ]
 [ 1.43051834]
 [ 1.40786321]
 [ 0.62276976]
 [ 2.59707689]
 [ 0.99939357]
 [ 0.06202713]
 [ 0.11962489]
 [ 0.46513577]
 [-0.18160138]
 [ 0.62276976]
 [ 0.61158972]
 [ 0.42664555]
 [ 0.9828954 ]
 [ 1.        ]
 [ 0.67835728]
 [-0.14023047]
 [ 0.80518055]
 [-0.14023047]
 [ 0.71581806]]
plotting the mu() policy learned by NN
state [-0.56982524  0.        ]
[ 1.74395084]
state [[-0.56873572]
 [ 0.00108954]]
[ 1.8267324]
state [[-0.56648195]
 [ 0.00225377]]
[ 1.89960682]
state [[-0.56300783]
 [ 0.00347411]]
[ 1.96963656]
state [[-0.5582692 ]
 [ 0.00473863]]
[ 2.02621937]
state [[-0.55227101]
 [ 0.00599819]]
[ 2.0737884]
state [[-0.54505807]
 [ 0.00721296]]
[ 2.11290669]
state [[-0.53668427]
 [ 0.0083738 ]]
[ 2.14585066]
state [[-0.52721238]
 [ 0.00947191]]
[ 2.17149878]
state [[-0.51671338]
 [ 0.01049901]]
[ 2.20558953]
state [[-0.50526601]
 [ 0.01144738]]
[ 2.23278236]
state [[-0.49295607]
 [ 0.01230995]]
[ 2.25000262]
state [[-0.47987562]
 [ 0.01308045]]
[ 2.25104785]
state [[-0.46612215]
 [ 0.01375347]]
[ 2.24740505]
state [[-0.45179763]
 [ 0.01432453]]
[ 2.23600864]
state [[-0.43700746]
 [ 0.01479017]]
[ 2.21314502]
state [[-0.42185944]
 [ 0.01514802]]
[ 2.17230296]
state [[-0.40646267]
 [ 0.01539677]]
[ 2.12860632]
state [[-0.39092645]
 [ 0.01553621]]
[ 2.08087134]
state [[-0.37535921]
 [ 0.01556724]]
[ 2.03362751]
state [[-0.35986748]
 [ 0.01549173]]
[ 1.97889018]
state [[-0.34457606]
 [ 0.01529142]]
[ 1.91865182]
state [[-0.32964504]
 [ 0.01493102]]
[ 1.8628124]
state [[-0.31522515]
 [ 0.01441989]]
[ 1.7893703]
state [[-0.30147889]
 [ 0.01374627]]
[ 1.70238888]
state [[-0.28857556]
 [ 0.01290334]]
[ 1.60350156]
state [[-0.27668893]
 [ 0.01188662]]
[ 1.50279224]
state [[-0.26598659]
 [ 0.01070235]]
[ 1.40121281]
state [[-0.25662845]
 [ 0.00935814]]
[ 1.31775784]
state [[-0.24874753]
 [ 0.00788092]]
[ 1.23903143]
state [[-0.24246319]
 [ 0.00628434]]
[ 1.14905322]
state [[-0.23789708]
 [ 0.00456611]]
[ 1.04378402]
state [[-0.23517707]
 [ 0.00272002]]
[ 0.92093688]
state [[-0.23443928]
 [ 0.00073779]]
[ 0.78435165]
state [[-0.2358239 ]
 [-0.00138461]]
[ 0.65105188]
state [[-0.23945747]
 [-0.00363358]]
[ 0.53205454]
state [[-0.2454412 ]
 [-0.00598372]]
[ 0.408795]
state [[-0.25386849]
 [-0.00842728]]
[ 0.30575001]
state [[-0.26479936]
 [-0.01093085]]
[ 0.20355143]
state [[-0.27827844]
 [-0.01347909]]
[ 0.1319042]
state [[-0.29430386]
 [-0.01602544]]
[ 0.10888037]
state [[-0.31280768]
 [-0.01850381]]
[ 0.08669588]
state [[-0.33370245]
 [-0.02089477]]
[ 0.06811915]
state [[-0.35687754]
 [-0.02317508]]
[ 0.06247708]
state [[-0.38218907]
 [-0.02531152]]
[ 0.03855994]
state [[-0.40949106]
 [-0.02730201]]
[ 0.0108965]
state [[-0.43862137]
 [-0.0291303 ]]
[-0.00274265]
state [[-0.46938214]
 [-0.03076075]]
[-0.01346433]
state [[-0.50154769]
 [-0.03216559]]
[-0.00733116]
state [[-0.53487855]
 [-0.03333085]]
[ 0.02973598]
state [[-0.56909508]
 [-0.03421653]]
[ 0.08908926]
state [[-0.60388237]
 [-0.03478728]]
[ 0.15041135]
state [[-0.63892293]
 [-0.03504055]]
[ 0.18497579]
state [[-0.6739307 ]
 [-0.03500779]]
[ 0.22367698]
state [[-0.70862514]
 [-0.03469446]]
[ 0.26619771]
state [[-0.74273586]
 [-0.03411074]]
[ 0.29688388]
state [[-0.77602202]
 [-0.03328618]]
[ 0.32465613]
state [[-0.80826616]
 [-0.03224417]]
[ 0.37416327]
state [[-0.8392514 ]
 [-0.03098522]]
[ 0.43883756]
state [[-0.86876869]
 [-0.02951728]]
[ 0.49000639]
state [[-0.89664567]
 [-0.02787696]]
[ 0.50578588]
state [[-0.92276752]
 [-0.02612186]]
[ 0.52331483]
state [[-0.94703823]
 [-0.02427072]]
[ 0.51490563]
state [[-0.96940607]
 [-0.02236782]]
[ 0.50544465]
state [[-0.98983622]
 [-0.02043015]]
[ 0.50881934]
state [[-1.00829446]
 [-0.01845826]]
[ 0.51950794]
state [[-1.02475023]
 [-0.01645575]]
[ 0.53843987]
state [[-1.03917325]
 [-0.01442298]]
[ 0.60582638]
state [[-1.05149114]
 [-0.01231788]]
[ 0.70930642]
state [[-1.06159997]
 [-0.01010878]]
[ 0.84608299]
state [[-1.06936502]
 [-0.00776503]]
[ 0.99160194]
state [[-1.07464397]
 [-0.00527895]]
[ 1.14951408]
state [[-1.07728183]
 [-0.00263791]]
[ 1.312374]
state [[ -1.07711756e+00]
 [  1.64288329e-04]]
[ 1.47201788]
state [[-1.0739913 ]
 [ 0.00312624]]
[ 1.63254893]
state [[-1.06774056]
 [ 0.00625072]]
[ 1.78021657]
state [[-1.05821443]
 [ 0.00952619]]
[ 1.90066719]
state [[-1.04528892]
 [ 0.01292549]]
[ 1.91618741]
state [[-1.02894723]
 [ 0.01634164]]
[ 1.87103009]
state [[-1.00923836]
 [ 0.01970892]]
[ 1.78965473]
state [[-0.986256  ]
 [ 0.02298239]]
[ 1.6742748]
state [[-0.960141  ]
 [ 0.02611499]]
[ 1.53665423]
state [[-0.93107414]
 [ 0.02906687]]
[ 1.40368235]
state [[-0.89925379]
 [ 0.03182038]]
[ 1.29205191]
state [[-0.86488354]
 [ 0.03437021]]
[ 1.21363449]
state [[-0.8281644 ]
 [ 0.03671915]]
[ 1.12374151]
state [[-0.78934211]
 [ 0.03882231]]
[ 1.02930892]
state [[-0.74870193]
 [ 0.04064018]]
[ 0.94601458]
state [[-0.70655286]
 [ 0.04214904]]
[ 0.92903829]
state [[-0.66317052]
 [ 0.04338237]]
[ 0.99666345]
state [[-0.61877501]
 [ 0.0443955 ]]
[ 1.07983351]
state [[-0.57359552]
 [ 0.04517949]]
[ 1.17630339]
state [[-0.52786613]
 [ 0.04572937]]
[ 1.27305841]
state [[-0.4818317 ]
 [ 0.04603443]]
[ 1.32580829]
state [[-0.43578389]
 [ 0.04604781]]
[ 1.37580955]
state [[-0.39001131]
 [ 0.0457726 ]]
[ 1.45826983]
state [[-0.34475574]
 [ 0.04525556]]
[ 1.60075748]
state [[-0.30017731]
 [ 0.04457843]]
[ 1.73247659]
state [[-0.25641939]
 [ 0.04375793]]
[ 1.81896627]
state [[-0.21363857]
 [ 0.04278082]]
[ 1.79878116]
state [[-0.17206284]
 [ 0.04157573]]
[ 1.66912258]
state [[-0.13199225]
 [ 0.04007059]]
[ 1.50331795]
state [[-0.09372489]
 [ 0.03826736]]
[ 1.36186373]
state [[-0.0574975 ]
 [ 0.03622739]]
[ 1.27615392]
state [[-0.02345685]
 [ 0.03404065]]
[ 1.25314677]
state [[ 0.00834313]
 [ 0.03179998]]
[ 1.27929306]
state [[ 0.03792319]
 [ 0.02958006]]
[ 1.30609918]
state [[ 0.06532551]
 [ 0.02740232]]
[ 1.31685901]
state [[ 0.09059255]
 [ 0.02526703]]
[ 1.31132305]
state [[ 0.11376267]
 [ 0.02317012]]
[ 1.32957602]
state [[ 0.13490656]
 [ 0.02114388]]
[ 1.35815954]
state [[ 0.15411057]
 [ 0.01920401]]
[ 1.36865497]
state [[ 0.1714457 ]
 [ 0.01733513]]
[ 1.35257304]
state [[ 0.18695685]
 [ 0.01551115]]
[ 1.29121161]
state [[ 0.20064224]
 [ 0.01368538]]
[ 1.23494565]
state [[ 0.21250196]
 [ 0.01185971]]
[ 1.18209171]
state [[ 0.22253481]
 [ 0.01003285]]
[ 1.12982881]
state [[ 0.23073421]
 [ 0.00819941]]
[ 1.08187592]
state [[ 0.2370909 ]
 [ 0.00635668]]
[ 1.02246881]
state [[ 0.24157622]
 [ 0.00448532]]
[ 0.96051872]
state [[ 0.24415037]
 [ 0.00257414]]
[ 0.88929617]
state [[ 0.24475497]
 [ 0.00060459]]
[ 0.83202487]
state [[ 0.24333578]
 [-0.00141919]]
[ 0.78516638]
state [[ 0.23983884]
 [-0.00349695]]
[ 0.76017684]
state [[ 0.23422176]
 [-0.00561708]]
[ 0.74682933]
state [[ 0.22644369]
 [-0.00777806]]
[ 0.74038106]
state [[ 0.21646103]
 [-0.00998266]]
[ 0.7366811]
state [[ 0.20422392]
 [-0.01223712]]
[ 0.73239964]
state [[ 0.1896739 ]
 [-0.01455001]]
[ 0.7332648]
state [[ 0.17275108]
 [-0.01692281]]
[ 0.73279816]
state [[ 0.15338935]
 [-0.01936173]]
[ 0.73383826]
state [[ 0.13152151]
 [-0.02186784]]
[ 0.73527014]
state [[ 0.10708103]
 [-0.02444048]]
[ 0.72413123]
state [[ 0.07999257]
 [-0.02708846]]
[ 0.7041688]
state [[ 0.05017992]
 [-0.02981265]]
[ 0.68753684]
state [[ 0.01758309]
 [-0.03259683]]
[ 0.65205777]
state [[-0.01785821]
 [-0.0354413 ]]
[ 0.66696626]
state [[-0.05612895]
 [-0.03827075]]
[ 0.6936813]
state [[-0.09717066]
 [-0.04104171]]
[ 0.73911709]
state [[-0.14086777]
 [-0.04369712]]
[ 0.77155739]
state [[-0.18707338]
 [-0.04620562]]
[ 0.7613948]
state [[-0.23563412]
 [-0.04856074]]
[ 0.73009008]
state [[-0.28636572]
 [-0.05073159]]
[ 0.67634618]
state [[-0.33905378]
 [-0.05268805]]
[ 0.60570353]
state [[-0.39345059]
 [-0.0543968 ]]
[ 0.53567272]
state [[-0.44926322]
 [-0.05581263]]
[ 0.42292958]
state [[-0.5062058 ]
 [-0.05694261]]
[ 0.31080198]
state [[-0.563968  ]
 [-0.05776219]]
[ 0.23212197]
state [[-0.62219602]
 [-0.05822804]]
[ 0.21745999]
state [[-0.68047786]
 [-0.05828184]]
[ 0.23750418]
state [[-0.73838854]
 [-0.0579107 ]]
[ 0.32655445]
state [[-0.79547095]
 [-0.05708241]]
[ 0.38877511]
state [[-0.85134423]
 [-0.05587326]]
[ 0.43935224]
state [[-0.90569741]
 [-0.05435317]]
[ 0.4465912]
state [[-0.95832586]
 [-0.05262847]]
[ 0.4521234]
state [[-1.00909054]
 [-0.05076468]]
[ 0.46551782]
state [[-1.05790603]
 [-0.04881548]]
[ 0.47297567]
state [[-1.1047498 ]
 [-0.04684379]]
[ 0.45539564]
state [[-1.14967537]
 [-0.04492557]]
[ 0.43864948]
state [[-1.19277954]
 [-0.04310413]]
[ 0.40661094]
state [-1.2  0. ]
[ 1.64182925]
state [[-1.19711637]
 [ 0.00288373]]
[ 1.80148149]
state [[-1.19117975]
 [ 0.00593659]]
[ 1.9628942]
state [[-1.18200994]
 [ 0.00916986]]
[ 2.12496138]
state [[-1.16954172]
 [ 0.01246817]]
[ 2.17843175]
state [[-1.15374005]
 [ 0.01580166]]
[ 2.13936758]
state [[-1.134565  ]
 [ 0.01917505]]
[ 2.06066251]
state [[-1.11197531]
 [ 0.02258966]]
[ 1.92038023]
state [[-1.08601236]
 [ 0.02596298]]
[ 1.75115359]
state [[-1.05681515]
 [ 0.02919721]]
[ 1.57703447]
state [[-1.02454197]
 [ 0.0322732 ]]
[ 1.4195056]
state [[-0.98935503]
 [ 0.03518694]]
[ 1.27137983]
state [[-0.95143425]
 [ 0.03792077]]
[ 1.16838074]
state [[-0.91094756]
 [ 0.04048669]]
[ 1.08165264]
state [[-0.86808515]
 [ 0.04286239]]
[ 0.97984576]
state [[-0.8230952 ]
 [ 0.04498992]]
[ 0.88050354]
state [[-0.77626884]
 [ 0.04682639]]
[ 0.81445163]
state [[-0.72790933]
 [ 0.04835954]]
[ 0.83076477]
state [[-0.67828083]
 [ 0.04962847]]
[ 0.90669209]
state [[-0.62762672]
 [ 0.05065409]]
[ 0.99413437]
state [[-0.57621086]
 [ 0.05141583]]
[ 1.09201598]
state [[-0.52431005]
 [ 0.0519008 ]]
[ 1.15986848]
state [[-0.47224405]
 [ 0.052066  ]]
[ 1.21502817]
state [[-0.42034668]
 [ 0.05189739]]
[ 1.28772318]
state [[-0.36892363]
 [ 0.05142305]]
[ 1.38543141]
state [[-0.31823403]
 [ 0.0506896 ]]
[ 1.53175569]
state [[-0.26845729]
 [ 0.04977673]]
[ 1.6586591]
state [[-0.21975401]
 [ 0.04870328]]
[ 1.70560515]
state [[-0.17232125]
 [ 0.04743277]]
[ 1.66419637]
state [[-0.12639758]
 [ 0.04592366]]
[ 1.54346871]
state [[-0.08225286]
 [ 0.04414472]]
[ 1.36944592]
state [[-0.04016297]
 [ 0.04208989]]
[ 1.24868858]
state [[-0.00030627]
 [ 0.0398567 ]]
[ 1.19123113]
state [[ 0.03724166]
 [ 0.03754793]]
[ 1.2225858]
state [[ 0.07252777]
 [ 0.03528611]]
[ 1.25756371]
state [[ 0.10563038]
 [ 0.03310262]]
[ 1.29038572]
state [[ 0.13664787]
 [ 0.03101748]]
[ 1.31552863]
state [[ 0.16568801]
 [ 0.02904015]]
[ 1.33174241]
state [[ 0.19286244]
 [ 0.02717443]]
[ 1.37475049]
state [[ 0.21831852]
 [ 0.02545609]]
[ 1.40110314]
state [[ 0.24219303]
 [ 0.0238745 ]]
[ 1.41060221]
state [[ 0.26460952]
 [ 0.02241647]]
[ 1.41570389]
state [[ 0.28568888]
 [ 0.02107938]]
[ 1.41213202]
state [[ 0.30554375]
 [ 0.01985486]]
[ 1.38834941]
state [[ 0.32426572]
 [ 0.01872197]]
[ 1.36686242]
state [[ 0.34194708]
 [ 0.01768136]]
[ 1.34674096]
state [[ 0.35867923]
 [ 0.01673215]]
[ 1.32226384]
state [[ 0.37454659]
 [ 0.01586737]]
[ 1.2954973]
state [[ 0.38962844]
 [ 0.01508185]]
[ 1.27105844]
state [[ 0.40400341]
 [ 0.01437497]]
[ 1.24891281]
state [[ 0.41774943]
 [ 0.01374604]]
[ 1.23212135]
state [[ 0.43094701]
 [ 0.01319756]]
[ 1.21812713]
state [[ 0.44367671]
 [ 0.01272972]]
[ 1.20605469]
state [[ 0.45601881]
 [ 0.01234208]]
[ 1.19604969]
state [[ 0.46805355]
 [ 0.01203475]]
[ 1.18806279]
state [[ 0.47986171]
 [ 0.01180815]]
[ 1.18195474]
state [[ 0.49152473]
 [ 0.01166301]]
[ 1.17788255]
episode length using learned policy: 217
result after minibatch no. 2000 : mean squared error: 0.24948951602
batch train data [[ -7.71736654e-01   6.72156857e-02]
 [ -1.00593915e+00  -1.31068799e-02]
 [  4.88448432e-01   3.75707726e-02]
 [ -4.25831404e-01  -1.41456190e-02]
 [ -1.10536512e+00  -7.44476778e-03]
 [  4.47537029e-01   4.66223335e-02]
 [  2.18272606e-01   4.87706537e-02]
 [ -4.32908583e-02  -6.57682859e-02]
 [ -1.04808482e+00   4.82670059e-02]
 [  4.33220112e-01   2.37294458e-02]
 [ -8.56111603e-01  -3.43328858e-02]
 [  5.72058864e-01   6.25841129e-02]
 [ -7.60091162e-01  -5.25770905e-02]
 [ -2.33422995e-01  -6.80687467e-02]
 [ -1.89857571e-01   6.10444760e-02]
 [ -5.25707463e-01   4.32103746e-02]
 [  5.81087933e-03  -1.41265208e-02]
 [ -9.58423861e-01  -1.88946076e-02]
 [  2.22984019e-01   5.95676327e-02]
 [ -1.09827912e+00  -4.82694470e-02]
 [ -1.41847811e-01  -4.54664657e-03]
 [ -9.16959095e-01   4.36608068e-03]
 [ -1.18134534e+00   5.54944034e-02]
 [  8.36780950e-02  -4.76502466e-02]
 [  1.51273240e-01   3.60347189e-02]
 [  4.35359144e-01   1.70714350e-02]
 [ -4.64465618e-01  -2.68534287e-04]
 [  4.04272731e-01  -3.05030825e-03]
 [  2.67493487e-01  -5.49816459e-02]
 [ -8.45789751e-01  -4.00902080e-02]
 [ -8.69872834e-01   1.02157263e-02]
 [ -5.71605777e-01   2.53761714e-02]
 [ -3.35401894e-01  -3.00633588e-02]
 [  1.52343212e-01  -9.34001095e-03]
 [ -1.01975506e+00   6.11318181e-02]
 [  2.67679530e-01  -6.49331962e-02]
 [  2.10367798e-01  -4.26697246e-02]
 [  1.92858904e-01  -3.72763789e-03]
 [  3.31391744e-01  -2.65948087e-02]
 [  5.83714368e-01  -5.10534269e-02]
 [ -3.04519007e-01   4.18722727e-02]
 [  2.52146928e-01  -6.87343078e-02]
 [ -4.66654440e-01  -3.34916433e-02]
 [ -1.19719669e+00  -1.84012642e-02]
 [  2.29576838e-02   3.83021561e-02]
 [ -6.76729665e-01   6.62426309e-02]
 [ -1.03875042e+00  -5.21632444e-02]
 [ -5.75819141e-01  -3.27541530e-02]
 [  5.94683450e-01  -5.56931417e-02]
 [ -1.05995635e+00  -2.38342723e-02]]
batch train labels [[ 1.        ]
 [ 1.14473635]
 [ 1.0021732 ]
 [ 0.23244138]
 [ 0.8526459 ]
 [ 1.        ]
 [ 1.00271191]
 [ 1.        ]
 [ 1.        ]
 [ 1.29067028]
 [ 0.6045682 ]
 [ 1.        ]
 [ 0.10847247]
 [ 0.99939357]
 [ 0.99983717]
 [ 2.06895135]
 [ 0.42664555]
 [ 1.0698471 ]
 [ 1.        ]
 [ 0.67835728]
 [ 0.58241356]
 [ 1.17997436]
 [ 1.        ]
 [ 1.        ]
 [ 1.36786012]
 [ 1.29067028]
 [-0.96749015]
 [ 0.2292072 ]
 [ 1.        ]
 [ 0.6045682 ]
 [ 1.17997436]
 [ 1.85801243]
 [ 0.04829047]
 [ 0.69129624]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.69129624]
 [ 0.97979885]
 [ 1.        ]
 [ 1.31216807]
 [ 1.        ]
 [ 0.04829047]
 [ 0.62209078]
 [ 1.43590492]
 [ 1.        ]
 [ 0.67835728]
 [ 0.46513577]
 [ 1.        ]
 [ 0.62209078]]
plotting the mu() policy learned by NN
state [-0.43392027  0.        ]
[ 1.34586442]
state [[ -4.34238911e-01]
 [ -3.18639999e-04]]
[ 1.31774664]
state [[-0.43490201]
 [-0.00066309]]
[ 1.2890178]
state [[-0.4359335 ]
 [-0.00103148]]
[ 1.25997853]
state [[-0.43735495]
 [-0.00142144]]
[ 1.23112023]
state [[-0.4391849 ]
 [-0.00182995]]
[ 1.20279336]
state [[-0.44143844]
 [-0.00225352]]
[ 1.17480516]
state [[-0.44412714]
 [-0.00268869]]
[ 1.14798427]
state [[-0.44725826]
 [-0.00313112]]
[ 1.1226989]
state [[-0.45083424]
 [-0.00357598]]
[ 1.09952474]
state [[-0.4548521 ]
 [-0.00401787]]
[ 1.0789578]
state [[-0.45930296]
 [-0.00445086]]
[ 1.06151056]
state [[-0.46417156]
 [-0.00486858]]
[ 1.04770327]
state [[-0.46943578]
 [-0.00526423]]
[ 1.03806424]
state [[-0.47506639]
 [-0.0056306 ]]
[ 1.03311706]
state [[-0.48102659]
 [-0.00596019]]
[ 1.03337789]
state [[-0.48727182]
 [-0.00624524]]
[ 1.0401299]
state [[-0.49374884]
 [-0.00647702]]
[ 1.05244088]
state [[-0.50039703]
 [-0.00664816]]
[ 1.07100892]
state [[-0.50714803]
 [-0.00675102]]
[ 1.09645033]
state [[-0.51392591]
 [-0.0067779 ]]
[ 1.12773216]
state [[-0.5206486]
 [-0.0067227]]
[ 1.16559541]
state [[-0.52722782]
 [-0.00657923]]
[ 1.20999432]
state [[-0.53356981]
 [-0.00634202]]
[ 1.25972009]
state [[-0.53957736]
 [-0.00600753]]
[ 1.31110716]
state [[-0.54515398]
 [-0.00557663]]
[ 1.36705482]
state [[-0.55020201]
 [-0.00504802]]
[ 1.43065166]
state [[-0.55462009]
 [-0.00441806]]
[ 1.50116515]
state [[-0.55830467]
 [-0.00368457]]
[ 1.57242501]
state [[-0.56115699]
 [-0.00285232]]
[ 1.64968145]
state [[-0.56307852]
 [-0.00192155]]
[ 1.72773707]
state [[-0.56397694]
 [-0.00089841]]
[ 1.8051641]
state [[ -5.63768089e-01]
 [  2.08854908e-04]]
[ 1.87773752]
state [[-0.56238097]
 [ 0.00138713]]
[ 1.94416606]
state [[-0.55975944]
 [ 0.00262151]]
[ 2.00673532]
state [[-0.55586725]
 [ 0.00389218]]
[ 2.05842018]
state [[-0.55073345]
 [ 0.00513382]]
[ 2.10291243]
state [[-0.54439634]
 [ 0.0063371 ]]
[ 2.14579296]
state [[-0.53690338]
 [ 0.00749299]]
[ 2.18674684]
state [[-0.52831066]
 [ 0.00859274]]
[ 2.21656704]
state [[-0.5186826 ]
 [ 0.00962808]]
[ 2.24598169]
state [[-0.50809139]
 [ 0.01059121]]
[ 2.26924992]
state [[-0.49661645]
 [ 0.01147495]]
[ 2.28323984]
state [[-0.48434365]
 [ 0.0122728 ]]
[ 2.29663372]
state [[-0.47136459]
 [ 0.01297907]]
[ 2.31113434]
state [[-0.45777568]
 [ 0.01358891]]
[ 2.32702446]
state [[-0.44367725]
 [ 0.01409844]]
[ 2.33549166]
state [[-0.42917249]
 [ 0.01450476]]
[ 2.32124758]
state [[-0.41436648]
 [ 0.01480599]]
[ 2.29431295]
state [[-0.39936516]
 [ 0.01500134]]
[ 2.25468636]
state [[-0.38427415]
 [ 0.01509101]]
[ 2.20634794]
state [[-0.36919793]
 [ 0.01507623]]
[ 2.15470648]
state [[-0.35423875]
 [ 0.01495919]]
[ 2.1016531]
state [[-0.33949581]
 [ 0.01474294]]
[ 2.05279922]
state [[-0.32506451]
 [ 0.01443131]]
[ 2.0030942]
state [[-0.31103572]
 [ 0.01402879]]
[ 1.95296156]
state [[-0.29754233]
 [ 0.0134934 ]]
[ 1.89603531]
state [[-0.28472131]
 [ 0.01282101]]
[ 1.83684301]
state [[-0.27270558]
 [ 0.01201573]]
[ 1.7566117]
state [[-0.26164225]
 [ 0.01106335]]
[ 1.66713548]
state [[-0.25168037]
 [ 0.00996189]]
[ 1.57017183]
state [[-0.24296892]
 [ 0.00871145]]
[ 1.47962761]
state [[-0.2356426 ]
 [ 0.00732632]]
[ 1.38890636]
state [[-0.22982827]
 [ 0.00581432]]
[ 1.29353988]
state [[-0.22564934]
 [ 0.00417893]]
[ 1.18035614]
state [[-0.22323878]
 [ 0.00241056]]
[ 1.05946672]
state [[-0.22272874]
 [ 0.00051004]]
[ 0.92026401]
state [[-0.22426081]
 [-0.00153207]]
[ 0.78186011]
state [[-0.22796623]
 [-0.00370543]]
[ 0.64696229]
state [[-0.23396249]
 [-0.00599626]]
[ 0.511729]
state [[-0.24235608]
 [-0.00839359]]
[ 0.3823154]
state [[-0.25323516]
 [-0.01087909]]
[ 0.26626569]
state [[-0.26666057]
 [-0.01342542]]
[ 0.18169817]
state [[-0.28264609]
 [-0.01598553]]
[ 0.14187831]
state [[-0.30114356]
 [-0.01849747]]
[ 0.10676824]
state [[-0.32208157]
 [-0.020938  ]]
[ 0.07860838]
state [[-0.34536195]
 [-0.02328038]]
[ 0.04816483]
state [[-0.37086815]
 [-0.02550619]]
[ 0.0020213]
state [[-0.39847815]
 [-0.02760999]]
[-0.0286148]
state [[-0.42800465]
 [-0.02952651]]
[-0.04355665]
state [[-0.45923832]
 [-0.03123368]]
[-0.04724225]
state [[-0.4919517 ]
 [-0.03271339]]
[-0.04596674]
state [[-0.52590209]
 [-0.03395039]]
[-0.01125608]
state [[-0.56083518]
 [-0.03493311]]
[ 0.0742421]
state [[-0.59641534]
 [-0.03558018]]
[ 0.11135311]
state [[-0.6323424 ]
 [-0.03592703]]
[ 0.13945183]
state [[-0.66832876]
 [-0.03598639]]
[ 0.16696534]
state [[-0.7040965 ]
 [-0.03576773]]
[ 0.20047975]
state [[-0.73937523]
 [-0.03527872]]
[ 0.2544913]
state [[-0.77389181]
 [-0.03451658]]
[ 0.33461612]
state [[-0.80736804]
 [-0.03347626]]
[ 0.43021208]
state [[-0.83953375]
 [-0.03216569]]
[ 0.49454263]
state [[-0.87017453]
 [-0.0306408 ]]
[ 0.56149119]
state [[-0.89909816]
 [-0.02892365]]
[ 0.62072235]
state [[-0.92614383]
 [-0.02704564]]
[ 0.65879858]
state [[-0.95119375]
 [-0.0250499 ]]
[ 0.69597185]
state [[-0.97415066]
 [-0.0229569 ]]
[ 0.72210538]
state [[-0.99494523]
 [-0.02079458]]
[ 0.74382055]
state [[-1.01352668]
 [-0.01858141]]
[ 0.76213008]
state [[-1.02985871]
 [-0.01633203]]
[ 0.82364714]
state [[-1.04387045]
 [-0.01401176]]
[ 0.94632339]
state [[-1.05543602]
 [-0.01156556]]
[ 1.10595357]
state [[-1.06439638]
 [-0.00896037]]
[ 1.26909077]
state [[-1.07059097]
 [-0.00619461]]
[ 1.44604278]
state [[-1.07384574]
 [-0.00325472]]
[ 1.62488139]
state [[ -1.07398355e+00]
 [ -1.37824798e-04]]
[ 1.80682516]
state [[-1.0708226 ]
 [ 0.00316093]]
[ 1.99269915]
state [[-1.06417525]
 [ 0.00664736]]
[ 2.17016625]
state [[-1.05403113]
 [ 0.01014411]]
[ 2.25894213]
state [[-1.04038751]
 [ 0.01364359]]
[ 2.23814249]
state [[-1.0232445 ]
 [ 0.01714307]]
[ 2.11681938]
state [[-1.00260794]
 [ 0.02063661]]
[ 1.96374595]
state [[-0.97852993]
 [ 0.02407803]]
[ 1.79625237]
state [[-0.95120853]
 [ 0.02732142]]
[ 1.6438241]
state [[-0.92084622]
 [ 0.0303623 ]]
[ 1.50016248]
state [[-0.88766122]
 [ 0.033185  ]]
[ 1.38274801]
state [[-0.85187435]
 [ 0.03578684]]
[ 1.26177752]
state [[-0.81374282]
 [ 0.03813156]]
[ 1.15590382]
state [[-0.77354383]
 [ 0.04019898]]
[ 1.06418204]
state [[-0.73157686]
 [ 0.04196696]]
[ 1.01129699]
state [[-0.68813801]
 [ 0.04343884]]
[ 1.00876474]
state [[-0.64350587]
 [ 0.04463214]]
[ 1.09075236]
state [[-0.59790295]
 [ 0.04560293]]
[ 1.19628286]
state [[-0.55155104]
 [ 0.04635189]]
[ 1.30531752]
state [[-0.50468445]
 [ 0.0468666 ]]
[ 1.42071187]
state [[-0.4575389 ]
 [ 0.04714553]]
[ 1.51695907]
state [[-0.41036862]
 [ 0.04717028]]
[ 1.60817504]
state [[-0.36342317]
 [ 0.04694546]]
[ 1.70073915]
state [[-0.31693259]
 [ 0.04649059]]
[ 1.84702039]
state [[-0.27104756]
 [ 0.04588502]]
[ 2.03863287]
state [[-0.22588059]
 [ 0.04516698]]
[ 2.19925785]
state [[-0.18166125]
 [ 0.04421934]]
[ 2.19000816]
state [[-0.13857974]
 [ 0.04308151]]
[ 2.05368996]
state [[-0.09678528]
 [ 0.04179446]]
[ 1.85125291]
state [[-0.05653492]
 [ 0.04025036]]
[ 1.71628213]
state [[-0.01803241]
 [ 0.03850251]]
[ 1.62249994]
state [[ 0.01859626]
 [ 0.03662867]]
[ 1.55066609]
state [[ 0.05327948]
 [ 0.03468322]]
[ 1.52337694]
state [[ 0.08601795]
 [ 0.03273847]]
[ 1.53170943]
state [[ 0.11687091]
 [ 0.03085296]]
[ 1.53398371]
state [[ 0.14590995]
 [ 0.02903904]]
[ 1.54735124]
state [[ 0.17323205]
 [ 0.0273221 ]]
[ 1.57056761]
state [[ 0.19895479]
 [ 0.02572274]]
[ 1.61745882]
state [[ 0.22322723]
 [ 0.02427244]]
[ 1.63140583]
state [[ 0.24617103]
 [ 0.0229438 ]]
[ 1.63404107]
state [[ 0.2679002 ]
 [ 0.02172917]]
[ 1.63001931]
state [[ 0.28852427]
 [ 0.02062407]]
[ 1.61189783]
state [[ 0.30813971]
 [ 0.01961545]]
[ 1.59281862]
state [[ 0.32684225]
 [ 0.01870253]]
[ 1.56500387]
state [[ 0.34471831]
 [ 0.01787607]]
[ 1.53369093]
state [[ 0.36184993]
 [ 0.01713163]]
[ 1.50479925]
state [[ 0.37832031]
 [ 0.01647036]]
[ 1.47830105]
state [[ 0.39421356]
 [ 0.01589324]]
[ 1.45416117]
state [[ 0.40961474]
 [ 0.0154012 ]]
[ 1.43166304]
state [[ 0.42460927]
 [ 0.01499454]]
[ 1.41019905]
state [[ 0.43928245]
 [ 0.01467319]]
[ 1.3910675]
state [[ 0.45372105]
 [ 0.0144386 ]]
[ 1.37426436]
state [[ 0.46801367]
 [ 0.01429262]]
[ 1.36040998]
state [[ 0.48225173]
 [ 0.01423807]]
[ 1.35016048]
state [[ 0.49653065]
 [ 0.01427892]]
[ 1.34211731]
episode length using learned policy: 160
result after minibatch no. 2500 : mean squared error: 0.252490282059
batch train data [[ 0.51152835  0.03864396]
 [-0.88334975  0.04798354]
 [-0.09685641  0.00448908]
 [ 0.50457711  0.06401512]
 [-0.99483139  0.03741501]
 [ 0.09694891 -0.02706233]
 [-0.09729257 -0.06083112]
 [-1.0673756  -0.00469454]
 [-0.75400291 -0.04806704]
 [-0.86843548 -0.02975838]
 [-0.40819671  0.0676523 ]
 [-0.91071893  0.01536018]
 [-0.03994666  0.04611755]
 [-0.43685338  0.02995514]
 [-0.22438025  0.02209474]
 [ 0.40879559  0.02472314]
 [-0.87256496 -0.06656697]
 [ 0.5543826  -0.06106608]
 [ 0.2502327   0.05355559]
 [ 0.3069261  -0.03683163]
 [ 0.03110618 -0.03529785]
 [-0.25221738  0.05269055]
 [-0.67828901 -0.04295455]
 [-0.80496988  0.03479189]
 [-0.30947878 -0.00586192]
 [-0.55912344 -0.05404565]
 [-0.95347379  0.02243578]
 [-1.03458667  0.04357261]
 [-0.84251824  0.05031131]
 [-1.01350889 -0.06000797]
 [-0.9872011  -0.01326568]
 [ 0.53158375  0.0187619 ]
 [ 0.10563374  0.03614629]
 [ 0.46921031  0.0156212 ]
 [-1.08891151  0.01399762]
 [ 0.53859561 -0.02155576]
 [-0.39659795 -0.01057462]
 [-0.90004402 -0.03736243]
 [ 0.1776976   0.04677869]
 [-0.84095086 -0.04568073]
 [-0.43330612 -0.01277961]
 [-0.76074258  0.06099143]
 [-1.08653491 -0.01621972]
 [-1.03920326 -0.03018457]
 [-0.89033435  0.00950131]
 [-0.77659736  0.06250147]
 [ 0.549096    0.04806387]
 [-0.61005334  0.03894289]
 [-0.65378732  0.03489525]
 [ 0.37674345  0.05172322]]
batch train labels [[ 1.0021732 ]
 [ 0.74289175]
 [ 0.94528861]
 [ 1.        ]
 [ 2.59707689]
 [ 0.27452517]
 [ 1.        ]
 [ 0.8526459 ]
 [ 0.10847247]
 [ 0.6045682 ]
 [ 1.05661102]
 [ 1.55615993]
 [ 1.54714103]
 [ 1.31216807]
 [ 1.57595806]
 [ 1.97769483]
 [ 0.93533027]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.27077842]
 [ 3.42616224]
 [ 0.10847247]
 [ 1.04543336]
 [-0.96749015]
 [-0.14023047]
 [ 1.55615993]
 [ 1.        ]
 [ 0.74289175]
 [ 0.93533027]
 [ 1.14473635]
 [ 1.29067028]
 [ 1.36786012]
 [ 1.29067028]
 [ 2.86599393]
 [ 1.        ]
 [-0.96749015]
 [ 0.6045682 ]
 [ 1.00271191]
 [-0.38809582]
 [-0.96749015]
 [ 1.        ]
 [ 0.62209078]
 [ 0.62276976]
 [ 1.17997436]
 [ 1.        ]
 [ 1.        ]
 [ 1.22226744]
 [ 1.22226744]
 [ 1.        ]]
plotting the mu() policy learned by NN
state [-0.58089582  0.        ]
[ 1.81932378]
state [[-0.57964891]
 [ 0.00124694]]
[ 1.91375804]
state [[-0.57706982]
 [ 0.00257909]]
[ 1.99709916]
state [[-0.57309431]
 [ 0.00397551]]
[ 2.07166433]
state [[-0.56774896]
 [ 0.00534536]]
[ 2.12506771]
state [[-0.56107342]
 [ 0.00667552]]
[ 2.17883253]
state [[-0.55311745]
 [ 0.00795599]]
[ 2.23348618]
state [[-0.54394037]
 [ 0.00917709]]
[ 2.28523564]
state [[-0.53361082]
 [ 0.01032956]]
[ 2.32363653]
state [[-0.52220619]
 [ 0.01140464]]
[ 2.34905219]
state [[-0.509812  ]
 [ 0.01239419]]
[ 2.35917687]
state [[-0.49652117]
 [ 0.01329082]]
[ 2.3596251]
state [[-0.4824332 ]
 [ 0.01408796]]
[ 2.36710572]
state [[-0.46765319]
 [ 0.01478   ]]
[ 2.37688589]
state [[-0.4522908 ]
 [ 0.01536238]]
[ 2.36759853]
state [[-0.43645915]
 [ 0.01583164]]
[ 2.33043623]
state [[-0.42027363]
 [ 0.01618551]]
[ 2.28644681]
state [[-0.4038507 ]
 [ 0.01642292]]
[ 2.2359066]
state [[-0.38730669]
 [ 0.01654401]]
[ 2.18049741]
state [[-0.37075663]
 [ 0.01655006]]
[ 2.12368155]
state [[-0.35431314]
 [ 0.01644349]]
[ 2.06231833]
state [[-0.33808541]
 [ 0.01622773]]
[ 2.0020504]
state [[-0.3221783]
 [ 0.0159071]]
[ 1.93383133]
state [[-0.30675775]
 [ 0.01542054]]
[ 1.85469496]
state [[-0.29199651]
 [ 0.01476123]]
[ 1.76500332]
state [[-0.27807087]
 [ 0.01392564]]
[ 1.65945709]
state [[-0.26516518]
 [ 0.0129057 ]]
[ 1.55963469]
state [[-0.25344968]
 [ 0.01171551]]
[ 1.46713078]
state [[-0.24307853]
 [ 0.01037115]]
[ 1.37482369]
state [[-0.23419677]
 [ 0.00888176]]
[ 1.27885473]
state [[-0.22694407]
 [ 0.00725269]]
[ 1.1863178]
state [[-0.22144769]
 [ 0.00549638]]
[ 1.07848406]
state [[-0.21784113]
 [ 0.00360656]]
[ 0.96150911]
state [[-0.21625793]
 [ 0.00158321]]
[ 0.83763039]
state [[-0.21682915]
 [-0.00057123]]
[ 0.70331991]
state [[-0.21968652]
 [-0.00285738]]
[ 0.56370556]
state [[-0.22495662]
 [-0.00527009]]
[ 0.42304748]
state [[-0.23275563]
 [-0.00779902]]
[ 0.3092235]
state [[-0.24316032]
 [-0.01040469]]
[ 0.20991324]
state [[-0.25621891]
 [-0.01305857]]
[ 0.1329256]
state [[-0.27194166]
 [-0.01572276]]
[ 0.1035373]
state [[-0.29027405]
 [-0.0183324 ]]
[ 0.09051277]
state [[-0.31112644]
 [-0.02085238]]
[ 0.06952205]
state [[-0.33439711]
 [-0.02327066]]
[ 0.03729346]
state [[-0.3599745]
 [-0.0255774]]
[-0.00885954]
state [[-0.38773039]
 [-0.02775589]]
[-0.05323765]
state [[-0.41747731]
 [-0.02974692]]
[-0.10727188]
state [[-0.44900677]
 [-0.03152946]]
[-0.13735884]
state [[-0.48209101]
 [-0.03308424]]
[-0.13708818]
state [[-0.51648575]
 [-0.03439474]]
[-0.10453612]
state [[-0.55193383]
 [-0.03544809]]
[-0.03938988]
state [[-0.58816963]
 [-0.03623583]]
[ 0.03343928]
state [[-0.62489074]
 [-0.03672113]]
[ 0.12243668]
state [[-0.66174138]
 [-0.03685064]]
[ 0.22026286]
state [[-0.69836509]
 [-0.03662371]]
[ 0.26814184]
state [[-0.73446918]
 [-0.03610406]]
[ 0.28919235]
state [[-0.76980591]
 [-0.03533673]]
[ 0.31842417]
state [[-0.80414104]
 [-0.03433514]]
[ 0.38093314]
state [[-0.83723092]
 [-0.03308988]]
[ 0.4520362]
state [[-0.86884856]
 [-0.03161763]]
[ 0.54403162]
state [[-0.89877152]
 [-0.02992299]]
[ 0.63801569]
state [[-0.92680025]
 [-0.02802874]]
[ 0.6979593]
state [[-0.95279235]
 [-0.02599209]]
[ 0.74928439]
state [[-0.97663474]
 [-0.02384241]]
[ 0.77056289]
state [[-0.99826241]
 [-0.02162765]]
[ 0.78874934]
state [[-1.01762819]
 [-0.01936579]]
[ 0.80849564]
state [[-1.03469527]
 [-0.01706713]]
[ 0.82790935]
state [[-1.04943621]
 [-0.01474097]]
[ 0.86882484]
state [[-1.06180847]
 [-0.01237221]]
[ 0.94137478]
state [[-1.0717417 ]
 [-0.00993323]]
[ 1.04340994]
state [[-1.07913828]
 [-0.0073966 ]]
[ 1.17848027]
state [[-1.08386791]
 [-0.00472958]]
[ 1.34843516]
state [[-1.08576417]
 [-0.00189626]]
[ 1.53202176]
state [[-1.08464515]
 [ 0.00111905]]
[ 1.72683311]
state [[-1.08031499]
 [ 0.00433012]]
[ 1.91264415]
state [[-1.07258451]
 [ 0.00773043]]
[ 2.07307458]
state [[-1.06136131]
 [ 0.01122319]]
[ 2.20023203]
state [[-1.0466404 ]
 [ 0.01472093]]
[ 2.21054554]
state [[-1.02841949]
 [ 0.01822093]]
[ 2.15501642]
state [[-1.00670254]
 [ 0.02171696]]
[ 2.0921979]
state [[-0.98150402]
 [ 0.02519854]]
[ 1.9900322]
state [[-0.95286387]
 [ 0.02864017]]
[ 1.8717823]
state [[-0.92095137]
 [ 0.03191251]]
[ 1.75496709]
state [[-0.88596106]
 [ 0.03499031]]
[ 1.65352046]
state [[-0.84810406]
 [ 0.03785702]]
[ 1.55516613]
state [[-0.8076247 ]
 [ 0.04047935]]
[ 1.45444739]
state [[-0.76480925]
 [ 0.04281543]]
[ 1.372684]
state [[-0.71996588]
 [ 0.04484339]]
[ 1.32899797]
state [[-0.67340446]
 [ 0.04656142]]
[ 1.32404089]
state [[-0.62543291]
 [ 0.04797156]]
[ 1.39495015]
state [[-0.57631445]
 [ 0.04911844]]
[ 1.50505078]
state [[-0.52629721]
 [ 0.05001722]]
[ 1.60469365]
state [[-0.47565508]
 [ 0.05064215]]
[ 1.66479373]
state [[-0.42470649]
 [ 0.0509486 ]]
[ 1.71213412]
state [[-0.37377661]
 [ 0.05092987]]
[ 1.77431357]
state [[-0.32315865]
 [ 0.05061797]]
[ 1.86287796]
state [[-0.27309212]
 [ 0.05006652]]
[ 1.98583806]
state [[-0.22374663]
 [ 0.04934549]]
[ 2.03380108]
state [[-0.17535877]
 [ 0.04838786]]
[ 1.99540269]
state [[-0.12813747]
 [ 0.0472213 ]]
[ 1.87688804]
state [[-0.08235683]
 [ 0.04578064]]
[ 1.69593716]
state [[-0.03830433]
 [ 0.0440525 ]]
[ 1.54282701]
state [[ 0.00380749]
 [ 0.04211181]]
[ 1.44833934]
state [[ 0.0438678 ]
 [ 0.04006032]]
[ 1.3922466]
state [[ 0.08184198]
 [ 0.03797418]]
[ 1.37090135]
state [[ 0.11776204]
 [ 0.03592006]]
[ 1.38479602]
state [[ 0.1517213 ]
 [ 0.03395925]]
[ 1.39870703]
state [[ 0.18383378]
 [ 0.03211249]]
[ 1.42985797]
state [[ 0.21424678]
 [ 0.030413  ]]
[ 1.47039306]
state [[ 0.24312903]
 [ 0.02888225]]
[ 1.51027083]
state [[ 0.2706576 ]
 [ 0.02752856]]
[ 1.52456641]
state [[ 0.29699057]
 [ 0.02633296]]
[ 1.50884187]
state [[ 0.32226074]
 [ 0.02527016]]
[ 1.49034405]
state [[ 0.34660137]
 [ 0.02434063]]
[ 1.47151089]
state [[ 0.37014753]
 [ 0.02354616]]
[ 1.45483506]
state [[ 0.39303786]
 [ 0.02289034]]
[ 1.43823636]
state [[ 0.41541207]
 [ 0.02237421]]
[ 1.42159891]
state [[ 0.43741065]
 [ 0.02199858]]
[ 1.4024173]
state [[ 0.45917243]
 [ 0.02176177]]
[ 1.38223481]
state [[ 0.48083624]
 [ 0.02166381]]
[ 1.35659468]
episode length using learned policy: 120
result after minibatch no. 3000 : mean squared error: 0.168998673558
batch train data [[ -5.20550924e-01   3.97777355e-02]
 [ -9.76474135e-05   2.64236431e-04]
 [  5.20480606e-01   3.57122871e-02]
 [  5.40856044e-01   2.97238174e-02]
 [ -1.13979703e+00  -3.28613290e-02]
 [ -8.43054142e-01  -1.35612836e-02]
 [  1.69265351e-01   3.16088143e-02]
 [ -6.01981915e-01  -4.35232419e-02]
 [  3.49228601e-01  -3.70572584e-02]
 [ -8.53149822e-02   2.64580409e-02]
 [  1.40875915e-01   4.16086563e-02]
 [ -7.09800284e-01  -2.75220099e-02]
 [ -1.11546071e+00   4.19765158e-02]
 [  1.27236074e-01  -4.44642035e-02]
 [  1.56671445e-02  -3.72749298e-02]
 [  5.60197395e-01   3.22478078e-02]
 [  2.02575314e-01  -2.53324585e-02]
 [ -5.04160219e-02   3.45945370e-03]
 [ -5.80272320e-01  -2.02959307e-02]
 [ -5.03435297e-01   6.49638244e-02]
 [  5.37021647e-01   2.69910082e-02]
 [ -1.09846364e+00   1.57628316e-02]
 [ -8.30064189e-01   1.58578910e-02]
 [  5.84227676e-01   6.15586969e-02]
 [ -9.15037709e-01  -5.62704038e-02]
 [  1.47602416e-01  -6.61400926e-02]
 [  6.35239547e-02   5.01253632e-02]
 [ -8.00131034e-01  -1.15742235e-02]
 [ -8.55596539e-01  -8.73094988e-03]
 [ -4.54147589e-01  -2.82335080e-02]
 [ -7.61915895e-01   9.23759925e-04]
 [ -6.92165077e-01   4.39577311e-02]
 [ -1.14844081e+00  -3.86852576e-02]
 [ -5.85098628e-01   3.27598169e-02]
 [ -5.67300109e-01   5.40443240e-02]
 [  3.96925979e-01   5.83629523e-03]
 [ -9.28160652e-02  -1.78751397e-02]
 [ -1.42999307e-01   9.12169394e-03]
 [ -1.83352426e-01  -6.80946277e-02]
 [ -7.15303444e-01   4.62020350e-02]
 [ -1.08495731e-01  -6.41296411e-03]
 [ -8.92422774e-02   4.86191439e-02]
 [ -2.09182512e-01  -1.59181687e-02]
 [ -1.05637457e-01   2.65178711e-03]
 [  1.37877952e-02   2.74503278e-02]
 [ -3.76256888e-01   5.84476101e-02]
 [ -7.37284669e-01  -3.86796493e-02]
 [ -3.82458561e-01   3.30310496e-02]
 [ -4.98560612e-01   5.56171288e-02]
 [ -9.92002811e-01   4.64755166e-02]]
batch train labels [[ 1.22226744]
 [ 0.94528861]
 [ 1.0021732 ]
 [ 1.0021732 ]
 [ 0.62276976]
 [ 1.14473635]
 [ 1.36786012]
 [-0.14023047]
 [ 1.        ]
 [ 1.1842359 ]
 [ 1.36786012]
 [ 0.92107525]
 [ 1.02836613]
 [ 1.        ]
 [ 0.27077842]
 [ 1.0021732 ]
 [ 0.27452517]
 [ 0.94528861]
 [-0.81585296]
 [ 1.00617072]
 [ 1.29067028]
 [ 2.887229  ]
 [ 1.43051834]
 [ 1.        ]
 [ 0.93533027]
 [ 1.        ]
 [ 1.00271191]
 [ 1.09654035]
 [ 1.14473635]
 [ 0.04829047]
 [ 1.23673655]
 [ 0.61158972]
 [ 0.62276976]
 [ 1.22226744]
 [ 2.06895135]
 [ 1.13224935]
 [ 0.42664555]
 [ 1.45858352]
 [ 0.99939357]
 [ 0.61158972]
 [ 0.81870769]
 [ 1.54714103]
 [ 0.11962489]
 [ 0.94528861]
 [ 1.1842359 ]
 [ 1.05661102]
 [ 0.71581806]
 [ 1.31216807]
 [ 2.06895135]
 [ 0.74289175]]
plotting the mu() policy learned by NN
state [-0.43470072  0.        ]
[ 1.30864716]
state [[ -4.35050935e-01]
 [ -3.50212678e-04]]
[ 1.27257991]
state [[-0.43578491]
 [-0.00073396]]
[ 1.23500371]
state [[-0.43693489]
 [-0.00114997]]
[ 1.19633448]
state [[-0.43853119]
 [-0.00159631]]
[ 1.15851092]
state [[-0.4406001 ]
 [-0.00206891]]
[ 1.12060463]
state [[-0.44316447]
 [-0.00256438]]
[ 1.08309817]
state [[-0.44624317]
 [-0.0030787 ]]
[ 1.04652143]
state [[-0.44985032]
 [-0.00360715]]
[ 1.0115788]
state [[-0.45399451]
 [-0.00414419]]
[ 0.97612923]
state [[-0.45868081]
 [-0.0046863 ]]
[ 0.94302434]
state [[-0.4639079 ]
 [-0.00522709]]
[ 0.91165739]
state [[-0.46966863]
 [-0.00576073]]
[ 0.88381141]
state [[-0.47594824]
 [-0.00627963]]
[ 0.8602249]
state [[-0.4827238 ]
 [-0.00677557]]
[ 0.84166729]
state [[-0.4899635]
 [-0.0072397]]
[ 0.82809836]
state [[-0.49762693]
 [-0.00766343]]
[ 0.82081968]
state [[-0.50566411]
 [-0.00803721]]
[ 0.82085943]
state [[-0.5140149 ]
 [-0.00835079]]
[ 0.82967001]
state [[-0.52260786]
 [-0.00859299]]
[ 0.8472752]
state [[-0.53136098]
 [-0.00875315]]
[ 0.87383938]
state [[-0.54018205]
 [-0.0088211 ]]
[ 0.90992826]
state [[-0.54896891]
 [-0.00878685]]
[ 0.95585507]
state [[-0.5576098]
 [-0.0086409]]
[ 1.00895739]
state [[-0.56598711]
 [-0.0083773 ]]
[ 1.07358718]
state [[-0.57397377]
 [-0.00798666]]
[ 1.15007353]
state [[-0.58143401]
 [-0.00746021]]
[ 1.23622131]
state [[-0.58822644]
 [-0.0067924 ]]
[ 1.33136654]
state [[-0.5942058 ]
 [-0.00597935]]
[ 1.4373343]
state [[-0.59922224]
 [-0.00501642]]
[ 1.54406714]
state [[-0.60313225]
 [-0.00391003]]
[ 1.65149474]
state [[-0.60579991]
 [-0.00266768]]
[ 1.76650262]
state [[-0.60709083]
 [-0.00129089]]
[ 1.86248899]
state [[ -6.06899559e-01]
 [  1.91258499e-04]]
[ 1.94208169]
state [[-0.60514796]
 [ 0.00175161]]
[ 2.02187514]
state [[-0.60179079]
 [ 0.00335715]]
[ 2.09457469]
state [[-0.59685254]
 [ 0.00493823]]
[ 2.16676569]
state [[-0.59036934]
 [ 0.00648322]]
[ 2.23788404]
state [[-0.5823887 ]
 [ 0.00798065]]
[ 2.31296253]
state [[-0.57296938]
 [ 0.0094193 ]]
[ 2.37663436]
state [[-0.56218117]
 [ 0.01078822]]
[ 2.42655993]
state [[-0.5501042 ]
 [ 0.01207695]]
[ 2.45381689]
state [[-0.5368287 ]
 [ 0.01327553]]
[ 2.46789908]
state [[-0.52245396]
 [ 0.01437473]]
[ 2.47434402]
state [[-0.50708783]
 [ 0.01536614]]
[ 2.4858408]
state [[-0.49084547]
 [ 0.01624236]]
[ 2.5186317]
state [[-0.47384837]
 [ 0.0169971 ]]
[ 2.55769348]
state [[-0.45622301]
 [ 0.01762536]]
[ 2.59693074]
state [[-0.43809953]
 [ 0.01812347]]
[ 2.63117266]
state [[-0.41961029]
 [ 0.01848924]]
[ 2.63551855]
state [[-0.40088838]
 [ 0.01872191]]
[ 2.59092569]
state [[-0.38206616]
 [ 0.01882223]]
[ 2.51249838]
state [[-0.3632738 ]
 [ 0.01879235]]
[ 2.42629766]
state [[-0.34463805]
 [ 0.01863574]]
[ 2.34575939]
state [[-0.32628095]
 [ 0.01835709]]
[ 2.2543292]
state [[-0.30831882]
 [ 0.01796213]]
[ 2.17207456]
state [[-0.29086137]
 [ 0.01745746]]
[ 2.08298111]
state [[-0.27401105]
 [ 0.01685034]]
[ 1.98507071]
state [[-0.25787747]
 [ 0.01613357]]
[ 1.88512063]
state [[-0.24264722]
 [ 0.01523025]]
[ 1.77114153]
state [[-0.22851218]
 [ 0.01413503]]
[ 1.66300535]
state [[-0.21564935]
 [ 0.01286284]]
[ 1.57858539]
state [[-0.20420274]
 [ 0.0114466 ]]
[ 1.48044682]
state [[-0.19432107]
 [ 0.00988167]]
[ 1.38916731]
state [[-0.18613732]
 [ 0.00818375]]
[ 1.28972578]
state [[-0.17978409]
 [ 0.00635323]]
[ 1.19272733]
state [[-0.17538324]
 [ 0.00440086]]
[ 1.08814991]
state [[-0.1730561 ]
 [ 0.00232714]]
[ 0.98019272]
state [[ -1.72919348e-01]
 [  1.36750750e-04]]
[ 0.84707701]
state [[-0.17510661]
 [-0.00218726]]
[ 0.7101832]
state [[-0.1797466 ]
 [-0.00463999]]
[ 0.57051575]
state [[-0.18696132]
 [-0.00721472]]
[ 0.46834156]
state [[-0.19682467]
 [-0.00986334]]
[ 0.39071834]
state [[-0.20937398]
 [-0.01254931]]
[ 0.31047231]
state [[-0.22463566]
 [-0.01526167]]
[ 0.28835812]
state [[-0.24256244]
 [-0.01792679]]
[ 0.28829807]
state [[-0.26306772]
 [-0.02050528]]
[ 0.28496653]
state [[-0.28604907]
 [-0.02298134]]
[ 0.25814003]
state [[-0.31140688]
 [-0.0253578 ]]
[ 0.22311454]
state [[-0.33902767]
 [-0.0276208 ]]
[ 0.1794907]
state [[-0.36878359]
 [-0.02975593]]
[ 0.12750363]
state [[-0.40053183]
 [-0.03174825]]
[ 0.06627749]
state [[-0.43411598]
 [-0.03358414]]
[ 0.01001416]
state [[-0.4693532 ]
 [-0.03523722]]
[-0.02759214]
state [[-0.50599545]
 [-0.03664227]]
[-0.02314813]
state [[-0.54376966]
 [-0.03777423]]
[ 0.02637456]
state [[-0.58236635]
 [-0.03859667]]
[ 0.10576288]
state [[-0.62141877]
 [-0.03905243]]
[ 0.18078502]
state [[-0.66056722]
 [-0.03914848]]
[ 0.24605714]
state [[-0.69947106]
 [-0.03890382]]
[ 0.31742364]
state [[-0.73779875]
 [-0.03832771]]
[ 0.36136079]
state [[-0.77526689]
 [-0.03746815]]
[ 0.38471752]
state [[-0.8116371]
 [-0.0363702]]
[ 0.41699272]
state [[-0.84668899]
 [-0.03505191]]
[ 0.44173235]
state [[-0.880238  ]
 [-0.03354899]]
[ 0.50330102]
state [[-0.91209078]
 [-0.03185279]]
[ 0.54640472]
state [[-0.94209975]
 [-0.03000894]]
[ 0.56269199]
state [[-0.97016925]
 [-0.02806949]]
[ 0.58120555]
state [[-0.99622399]
 [-0.02605474]]
[ 0.6075449]
state [[-1.02020037]
 [-0.02397637]]
[ 0.62921655]
state [[-1.04205573]
 [-0.02185534]]
[ 0.64201403]
state [[-1.06176937]
 [-0.01971363]]
[ 0.65549219]
state [[-1.07932985]
 [-0.01756052]]
[ 0.66871083]
state [[-1.09473324]
 [-0.01540342]]
[ 0.68322361]
state [[-1.10797882]
 [-0.01324557]]
[ 0.70507103]
state [[-1.11906075]
 [-0.01108195]]
[ 0.77704406]
state [[-1.12792349]
 [-0.00886278]]
[ 0.91367543]
state [[-1.13444555]
 [-0.00652206]]
[ 1.08935845]
state [[-1.13846338]
 [-0.00401785]]
[ 1.27234459]
state [[-1.13980198]
 [-0.00133863]]
[ 1.46314979]
state [[-1.13827336]
 [ 0.00152867]]
[ 1.66237783]
state [[-1.1336751 ]
 [ 0.00459831]]
[ 1.86970806]
state [[-1.12579072]
 [ 0.00788435]]
[ 2.06766891]
state [[-1.11447549]
 [ 0.01131519]]
[ 2.21565843]
state [[-1.09971106]
 [ 0.01476444]]
[ 2.20532417]
state [[-1.08147764]
 [ 0.01823348]]
[ 2.15804601]
state [[-1.05975735]
 [ 0.02172027]]
[ 2.06319332]
state [[-1.03453887]
 [ 0.0252185 ]]
[ 1.92785811]
state [[-1.0058943 ]
 [ 0.02864455]]
[ 1.79090047]
state [[-0.97397804]
 [ 0.03191629]]
[ 1.65021968]
state [[-0.93897158]
 [ 0.03500644]]
[ 1.51060581]
state [[-0.90108514]
 [ 0.03788643]]
[ 1.37812996]
state [[-0.86055696]
 [ 0.0405282 ]]
[ 1.27347398]
state [[-0.81763703]
 [ 0.04291992]]
[ 1.18232012]
state [[-0.77260458]
 [ 0.04503245]]
[ 1.14114165]
state [[-0.72573239]
 [ 0.04687222]]
[ 1.1472733]
state [[-0.67728812]
 [ 0.04844428]]
[ 1.24019098]
state [[-0.62749135]
 [ 0.04979673]]
[ 1.34273553]
state [[-0.57658523]
 [ 0.05090611]]
[ 1.45488405]
state [[-0.52482849]
 [ 0.05175672]]
[ 1.52800393]
state [[-0.47253454]
 [ 0.05229395]]
[ 1.60658193]
state [[-0.42001548]
 [ 0.05251904]]
[ 1.66818333]
state [[-0.36759269]
 [ 0.0524228 ]]
[ 1.70074797]
state [[-0.31559694]
 [ 0.05199575]]
[ 1.78377748]
state [[-0.26427814]
 [ 0.0513188 ]]
[ 1.94049048]
state [[-0.21377343]
 [ 0.05050471]]
[ 2.0782547]
state [[-0.16427198]
 [ 0.04950145]]
[ 2.09727001]
state [[-0.11597304]
 [ 0.04829894]]
[ 1.93076444]
state [[-0.06909355]
 [ 0.04687949]]
[ 1.69170165]
state [[-0.02396884]
 [ 0.04512471]]
[ 1.52474213]
state [[ 0.01918707]
 [ 0.04315591]]
[ 1.43006253]
state [[ 0.06027719]
 [ 0.04109012]]
[ 1.35217357]
state [[ 0.09926024]
 [ 0.03898305]]
[ 1.31643128]
state [[ 0.13616975]
 [ 0.03690951]]
[ 1.29315615]
state [[ 0.17107813]
 [ 0.03490838]]
[ 1.30407405]
state [[ 0.20411268]
 [ 0.03303455]]
[ 1.30577326]
state [[ 0.23540723]
 [ 0.03129456]]
[ 1.3211267]
state [[ 0.26512086]
 [ 0.02971364]]
[ 1.34163809]
state [[ 0.29342607]
 [ 0.02830521]]
[ 1.34916925]
state [[ 0.3204881 ]
 [ 0.02706204]]
[ 1.34545207]
state [[ 0.34646478]
 [ 0.02597669]]
[ 1.33424473]
state [[ 0.37150887]
 [ 0.02504408]]
[ 1.32211137]
state [[ 0.39577356]
 [ 0.02426469]]
[ 1.30933046]
state [[ 0.4194122 ]
 [ 0.02363865]]
[ 1.29562449]
state [[ 0.44257772]
 [ 0.02316554]]
[ 1.27948022]
state [[ 0.46542105]
 [ 0.02284333]]
[ 1.26119518]
state [[ 0.48809144]
 [ 0.0226704 ]]
[ 1.24144125]
episode length using learned policy: 157

In [ ]: