In [1]:
import mountain_car_montecarlo_pg_linear as mcpg

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

Testing with regular deterministic checks

using linear features without bias


In [18]:
car1 = mcpg.mountain_car(init_alpha= 1e-3 )


[2016-05-15 15:15:04,476] Making new env: MountainCar-v0

In [19]:
car1.train(iter=1000, dataname ='alpha_1e-3_feature2_schedule_50' , save = True)


EPISODE #10.0
with a exploration of 84.7457627119%
and learning rate of 0.000333333333333
lasted 2611 steps
theta
[[ 0.77620391 -0.57578263 -0.20042128]
 [-0.71084185 -0.32222901  1.03307086]]

EPISODE #20.0
with a exploration of 72.4637681159%
and learning rate of 0.000229415733871
lasted 1511 steps
theta
[[ 0.84555875 -0.45466554 -0.39089321]
 [-0.91590881 -0.42922704  1.34513586]]

EPISODE #30.0
with a exploration of 63.2911392405%
and learning rate of 0.000185695338177
lasted 2259 steps
theta
[[ 0.76985668 -0.33228628 -0.43757039]
 [-1.08514913 -0.38189019  1.46703932]]

EPISODE #40.0
with a exploration of 56.1797752809%
and learning rate of 0.000160128153805
lasted 1296 steps
theta
[[ 0.77188386 -0.3118855  -0.45999836]
 [-1.16678854 -0.32316812  1.48995666]]

EPISODE #50.0
with a exploration of 50.5050505051%
and learning rate of 0.000142857142857
lasted 972 steps
theta
[[ 0.79142005 -0.32951745 -0.4619026 ]
 [-1.18866802 -0.35998428  1.54865229]]

EPISODE #60.0
with a exploration of 45.871559633%
and learning rate of 0.000130188910981
lasted 672 steps
theta
[[ 0.77539324 -0.31383504 -0.46155821]
 [-1.21619223 -0.37770635  1.59389858]]

EPISODE #70.0
with a exploration of 42.0168067227%
and learning rate of 0.000120385853086
lasted 815 steps
theta
[[ 0.78112481 -0.37413943 -0.40698538]
 [-1.17827455 -0.44452815  1.62280271]]

EPISODE #80.0
with a exploration of 38.7596899225%
and learning rate of 0.000112508790093
lasted 2004 steps
theta
[[ 0.74063189 -0.3572525  -0.3833794 ]
 [-1.213462   -0.4135937   1.62705571]]

EPISODE #90.0
with a exploration of 35.9712230216%
and learning rate of 0.000105999788001
lasted 1327 steps
theta
[[ 0.73168615 -0.27862593 -0.45306022]
 [-1.22266188 -0.40336846  1.62603034]]

EPISODE #100.0
with a exploration of 33.5570469799%
and learning rate of 0.000100503781526
lasted 843 steps
theta
[[ 0.62621087 -0.23369859 -0.39251228]
 [-1.25739894 -0.39366011  1.65105905]]

EPISODE #110.0
with a exploration of 31.4465408805%
and learning rate of 9.57826285221e-05
lasted 1290 steps
theta
[[ 0.60923567 -0.229025   -0.38021067]
 [-1.28809578 -0.3808642   1.66895998]]

EPISODE #120.0
with a exploration of 29.5857988166%
and learning rate of 9.16698497028e-05
lasted 810 steps
theta
[[ 0.59038136 -0.21612029 -0.37426107]
 [-1.27528688 -0.36852976  1.64381664]]

EPISODE #130.0
with a exploration of 27.9329608939%
and learning rate of 8.80450906326e-05
lasted 1240 steps
theta
[[ 0.58670655 -0.18869642 -0.39801013]
 [-1.29256748 -0.37311023  1.6656777 ]]

EPISODE #140.0
with a exploration of 26.455026455%
and learning rate of 8.4818892968e-05
lasted 1022 steps
theta
[[ 0.58473067 -0.19796432 -0.38676636]
 [-1.31165531 -0.38298786  1.69464317]]

EPISODE #150.0
with a exploration of 25.1256281407%
and learning rate of 8.19231920519e-05
lasted 491 steps
theta
[[ 0.58325984 -0.16677664 -0.4164832 ]
 [-1.3056862  -0.38234026  1.68802646]]

EPISODE #160.0
with a exploration of 23.9234449761%
and learning rate of 7.93051585718e-05
lasted 401 steps
theta
[[ 0.60138443 -0.1481592  -0.45322523]
 [-1.32413627 -0.36661215  1.69074842]]

EPISODE #170.0
with a exploration of 22.8310502283%
and learning rate of 7.69230769231e-05
lasted 834 steps
theta
[[ 0.58889088 -0.13978231 -0.44910857]
 [-1.33454797 -0.37160049  1.70614846]]

EPISODE #180.0
with a exploration of 21.8340611354%
and learning rate of 7.47435092752e-05
lasted 923 steps
theta
[[ 0.55856205 -0.14760932 -0.41095274]
 [-1.33793388 -0.39846707  1.73640095]]

EPISODE #190.0
with a exploration of 20.9205020921%
and learning rate of 7.27392967453e-05
lasted 731 steps
theta
[[ 0.57440306 -0.12675272 -0.44765034]
 [-1.35203977 -0.41002902  1.76206879]]

EPISODE #200.0
with a exploration of 20.0803212851%
and learning rate of 7.08881205008e-05
lasted 617 steps
theta
[[ 0.58424427 -0.11979724 -0.46444703]
 [-1.36259472 -0.4092865   1.77188121]]

EPISODE #210.0
with a exploration of 19.305019305%
and learning rate of 6.91714463866e-05
lasted 595 steps
theta
[[ 0.58051146 -0.09010382 -0.49040764]
 [-1.37768966 -0.41824269  1.79593235]]

EPISODE #220.0
with a exploration of 18.5873605948%
and learning rate of 6.75737378399e-05
lasted 534 steps
theta
[[ 0.5618192  -0.07723904 -0.48458016]
 [-1.40314094 -0.41053852  1.81367945]]

EPISODE #230.0
with a exploration of 17.9211469534%
and learning rate of 6.60818600455e-05
lasted 754 steps
theta
[[ 0.56760759 -0.06342291 -0.50418468]
 [-1.40194868 -0.41798605  1.81993473]]

EPISODE #240.0
with a exploration of 17.3010380623%
and learning rate of 6.46846227353e-05
lasted 550 steps
theta
[[ 0.56117423 -0.07381393 -0.48736031]
 [-1.39525597 -0.43618483  1.8314408 ]]

EPISODE #250.0
with a exploration of 16.7224080268%
and learning rate of 6.33724250524e-05
lasted 909 steps
theta
[[ 0.54944675 -0.07969613 -0.46975063]
 [-1.41248639 -0.44764105  1.86012744]]

EPISODE #260.0
with a exploration of 16.1812297735%
and learning rate of 6.21369766001e-05
lasted 610 steps
theta
[[ 0.57746023 -0.107717   -0.46974322]
 [-1.42695785 -0.43816554  1.86512339]]

EPISODE #270.0
with a exploration of 15.6739811912%
and learning rate of 6.0971076085e-05
lasted 653 steps
theta
[[ 0.54740075 -0.09104999 -0.45635075]
 [-1.4647128  -0.40811     1.8728228 ]]

EPISODE #280.0
with a exploration of 15.1975683891%
and learning rate of 5.98684340089e-05
lasted 792 steps
theta
[[ 0.54660155 -0.0770425  -0.46955905]
 [-1.48585368 -0.39457616  1.88042984]]

EPISODE #290.0
with a exploration of 14.7492625369%
and learning rate of 5.88235294118e-05
lasted 813 steps
theta
[[ 0.54217206 -0.07859153 -0.46358053]
 [-1.49623819 -0.41225271  1.90849091]]

EPISODE #300.0
with a exploration of 14.3266475645%
and learning rate of 5.78314931966e-05
lasted 595 steps
theta
[[ 0.53326688 -0.07054183 -0.46272505]
 [-1.50550514 -0.41730634  1.92281148]]

EPISODE #310.0
with a exploration of 13.9275766017%
and learning rate of 5.68880123989e-05
lasted 650 steps
theta
[[ 0.54626544 -0.07537857 -0.47088687]
 [-1.49374491 -0.43018338  1.92392829]]

EPISODE #320.0
with a exploration of 13.5501355014%
and learning rate of 5.59892510956e-05
lasted 412 steps
theta
[[ 0.5594136  -0.11571233 -0.44370127]
 [-1.49584594 -0.43746578  1.93331172]]

EPISODE #330.0
with a exploration of 13.1926121372%
and learning rate of 5.5131784642e-05
lasted 864 steps
theta
[[ 0.5425537  -0.08568291 -0.45687079]
 [-1.49689621 -0.42826726  1.92516346]]

EPISODE #340.0
with a exploration of 12.853470437%
and learning rate of 5.43125446594e-05
lasted 596 steps
theta
[[ 0.54449088 -0.10408113 -0.44040976]
 [-1.49157301 -0.44657582  1.93814883]]

EPISODE #350.0
with a exploration of 12.5313283208%
and learning rate of 5.35287727572e-05
lasted 745 steps
theta
[[ 0.54341595 -0.10366727 -0.43974868]
 [-1.50343429 -0.44321699  1.94665128]]

EPISODE #360.0
with a exploration of 12.2249388753%
and learning rate of 5.27779813969e-05
lasted 664 steps
theta
[[ 0.53130777 -0.07181084 -0.45949693]
 [-1.51837368 -0.43356798  1.95194166]]

EPISODE #370.0
with a exploration of 11.9331742243%
and learning rate of 5.20579206295e-05
lasted 885 steps
theta
[[ 0.51699821 -0.05605214 -0.46094608]
 [-1.51751494 -0.43903305  1.95654798]]

EPISODE #380.0
with a exploration of 11.655011655%
and learning rate of 5.13665496938e-05
lasted 696 steps
theta
[[ 0.52644761 -0.07034312 -0.45610449]
 [-1.52013016 -0.4464757   1.96660587]]

EPISODE #390.0
with a exploration of 11.3895216401%
and learning rate of 5.07020126563e-05
lasted 448 steps
theta
[[ 0.52475412 -0.08458279 -0.44017132]
 [-1.53352644 -0.44351728  1.97704372]]

EPISODE #400.0
with a exploration of 11.135857461%
and learning rate of 5.00626174322e-05
lasted 1121 steps
theta
[[ 0.51538128 -0.07137079 -0.4440105 ]
 [-1.54457724 -0.44299641  1.98757364]]

EPISODE #410.0
with a exploration of 10.8932461874%
and learning rate of 4.94468176434e-05
lasted 447 steps
theta
[[ 0.52998823 -0.0734383  -0.45654993]
 [-1.54259528 -0.43892714  1.98152242]]

EPISODE #420.0
with a exploration of 10.6609808102%
and learning rate of 4.88531968746e-05
lasted 698 steps
theta
[[ 0.50951132 -0.04271798 -0.46679334]
 [-1.55202539 -0.43999165  1.99201705]]

EPISODE #430.0
with a exploration of 10.4384133612%
and learning rate of 4.82804549585e-05
lasted 960 steps
theta
[[ 0.50260434 -0.04585933 -0.45674501]
 [-1.56450618 -0.43523961  1.9997458 ]]

EPISODE #440.0
with a exploration of 10.2249488753%
and learning rate of 4.77273959903e-05
lasted 697 steps
theta
[[ 0.49165298 -0.03194207 -0.45971092]
 [-1.57101922 -0.43696997  2.00798919]]

EPISODE #450.0
with a exploration of 10.0200400802%
and learning rate of 4.71929178183e-05
lasted 524 steps
theta
[[ 0.48182901 -0.0334475  -0.44838151]
 [-1.58916588 -0.42735206  2.01651794]]

EPISODE #460.0
with a exploration of 9.8231827112%
and learning rate of 4.66760028009e-05
lasted 381 steps
theta
[[ 0.46841135 -0.02561109 -0.44280026]
 [-1.61164557 -0.42168561  2.03333119]]

EPISODE #470.0
with a exploration of 9.63391136802%
and learning rate of 4.6175709654e-05
lasted 441 steps
theta
[[ 0.43990791 -0.02831404 -0.41159387]
 [-1.62220902 -0.43395926  2.05616829]]

EPISODE #480.0
with a exploration of 9.45179584121%
and learning rate of 4.56911662385e-05
lasted 695 steps
theta
[[ 0.43425713 -0.01978345 -0.41447368]
 [-1.63619226 -0.4215898   2.05778206]]

EPISODE #490.0
with a exploration of 9.27643784787%
and learning rate of 4.52215631646e-05
lasted 717 steps
theta
[[ 0.43014575 -0.02287411 -0.40727164]
 [-1.64820477 -0.42897945  2.07718422]]

EPISODE #500.0
with a exploration of 9.10746812386%
and learning rate of 4.47661481036e-05
lasted 706 steps
theta
[[ 0.42029411 -0.00811168 -0.41218243]
 [-1.65252815 -0.42987751  2.08240565]]

EPISODE #510.0
with a exploration of 8.94454382826%
and learning rate of 4.43242207178e-05
lasted 737 steps
theta
[[ 0.42319839 -0.004036   -0.41916239]
 [-1.65608378 -0.42854424  2.08462802]]

EPISODE #520.0
with a exploration of 8.78734622144%
and learning rate of 4.38951281306e-05
lasted 575 steps
theta
[[ 0.40888545 -0.00916538 -0.39972007]
 [-1.65910879 -0.43282139  2.09193017]]

EPISODE #530.0
with a exploration of 8.63557858377%
and learning rate of 4.34782608696e-05
lasted 762 steps
theta
[[ 0.40942984 -0.00769773 -0.40173211]
 [-1.66172447 -0.43610155  2.09782602]]

EPISODE #540.0
with a exploration of 8.48896434635%
and learning rate of 4.30730492254e-05
lasted 474 steps
theta
[[ 0.41103468 -0.00623718 -0.4047975 ]
 [-1.67501128 -0.43965406  2.11466535]]

EPISODE #550.0
with a exploration of 8.34724540902%
and learning rate of 4.26789599776e-05
lasted 528 steps
theta
[[  4.00858034e-01  -7.21421132e-04  -4.00136613e-01]
 [ -1.67400613e+00  -4.46299604e-01   2.12030573e+00]]

EPISODE #560.0
with a exploration of 8.21018062397%
and learning rate of 4.22954934438e-05
lasted 424 steps
theta
[[ 0.41017936 -0.00614094 -0.40403842]
 [-1.68093312 -0.44058561  2.12151872]]

EPISODE #570.0
with a exploration of 8.07754442649%
and learning rate of 4.1922180815e-05
lasted 372 steps
theta
[[  4.12592635e-01  -1.53916737e-04  -4.12438719e-01]
 [ -1.68101184e+00  -4.45464672e-01   2.12647651e+00]]

EPISODE #580.0
with a exploration of 7.94912559618%
and learning rate of 4.15585817462e-05
lasted 736 steps
theta
[[ 0.42654684  0.00439489 -0.43094173]
 [-1.68522178 -0.44935289  2.13457467]]

EPISODE #590.0
with a exploration of 7.82472613459%
and learning rate of 4.12042821715e-05
lasted 431 steps
theta
[[ 0.42665385  0.01129158 -0.43794542]
 [-1.68743311 -0.45541922  2.14285233]]

EPISODE #600.0
with a exploration of 7.70416024653%
and learning rate of 4.08588923223e-05
lasted 843 steps
theta
[[ 0.42895728  0.02295815 -0.45191543]
 [-1.69320697 -0.46812175  2.16132872]]

EPISODE #610.0
with a exploration of 7.58725341426%
and learning rate of 4.05220449237e-05
lasted 633 steps
theta
[[ 0.43694134  0.0043081  -0.44124944]
 [-1.68917336 -0.47495413  2.16412749]]

EPISODE #620.0
with a exploration of 7.47384155456%
and learning rate of 4.01933935529e-05
lasted 480 steps
theta
[[ 0.44115897  0.0115286  -0.45268757]
 [-1.7023437  -0.47184142  2.17418512]]

EPISODE #630.0
with a exploration of 7.36377025037%
and learning rate of 3.98726111414e-05
lasted 811 steps
theta
[[ 0.4408341   0.00328535 -0.44411945]
 [-1.71545255 -0.46972908  2.18518163]]

EPISODE #640.0
with a exploration of 7.25689404935%
and learning rate of 3.95593886065e-05
lasted 535 steps
theta
[[ 0.45154865  0.01517335 -0.46672201]
 [-1.71763833 -0.48494026  2.20257859]]

EPISODE #650.0
with a exploration of 7.1530758226%
and learning rate of 3.92534335989e-05
lasted 499 steps
theta
[[ 0.43915077  0.01819375 -0.45734452]
 [-1.72685544 -0.47548711  2.20234254]]

EPISODE #660.0
with a exploration of 7.05218617772%
and learning rate of 3.89544693566e-05
lasted 333 steps
theta
[[ 0.4379731   0.02581651 -0.46378962]
 [-1.72576    -0.48747689  2.2132369 ]]

EPISODE #670.0
with a exploration of 6.95410292072%
and learning rate of 3.86622336514e-05
lasted 516 steps
theta
[[ 0.44338116  0.01636397 -0.45974514]
 [-1.72227925 -0.48117911  2.20345836]]

EPISODE #680.0
with a exploration of 6.85871056241%
and learning rate of 3.83764778227e-05
lasted 369 steps
theta
[[ 0.43836757  0.02106587 -0.45943343]
 [-1.72352985 -0.48350429  2.20703414]]

EPISODE #690.0
with a exploration of 6.76589986468%
and learning rate of 3.8096965888e-05
lasted 505 steps
theta
[[ 0.43951915  0.01854206 -0.45806121]
 [-1.73335536 -0.49323651  2.22659187]]

EPISODE #700.0
with a exploration of 6.67556742323%
and learning rate of 3.78234737236e-05
lasted 425 steps
theta
[[ 0.42861409  0.00986498 -0.43847908]
 [-1.73930394 -0.49205246  2.2313564 ]]

EPISODE #710.0
with a exploration of 6.58761528327%
and learning rate of 3.75557883094e-05
lasted 572 steps
theta
[[ 0.42615456  0.00480912 -0.43096368]
 [-1.74373797 -0.49743114  2.24116911]]

EPISODE #720.0
with a exploration of 6.50195058518%
and learning rate of 3.72937070314e-05
lasted 403 steps
theta
[[ 0.42297845  0.01629359 -0.43927203]
 [-1.74897781 -0.50479981  2.25377761]]

EPISODE #730.0
with a exploration of 6.41848523748%
and learning rate of 3.7037037037e-05
lasted 614 steps
theta
[[ 0.41002351  0.03466703 -0.44469054]
 [-1.75207891 -0.5058332   2.25791211]]

EPISODE #740.0
with a exploration of 6.3371356147%
and learning rate of 3.67855946389e-05
lasted 415 steps
theta
[[ 0.39292928  0.04657682 -0.4395061 ]
 [-1.76091698 -0.50641574  2.26733272]]

EPISODE #750.0
with a exploration of 6.25782227785%
and learning rate of 3.65392047621e-05
lasted 340 steps
theta
[[ 0.39198403  0.02617802 -0.41816205]
 [-1.76384585 -0.50788656  2.27173241]]

EPISODE #760.0
with a exploration of 6.1804697157%
and learning rate of 3.6297700432e-05
lasted 524 steps
theta
[[ 0.38957741  0.02873745 -0.41831486]
 [-1.76969431 -0.49664883  2.26634314]]

EPISODE #770.0
with a exploration of 6.10500610501%
and learning rate of 3.60609222987e-05
lasted 561 steps
theta
[[ 0.39100719  0.02194447 -0.41295166]
 [-1.77177956 -0.50116329  2.27294285]]

EPISODE #780.0
with a exploration of 6.03136308806%
and learning rate of 3.5828718195e-05
lasted 586 steps
theta
[[ 0.37841849  0.03353254 -0.41195104]
 [-1.77954586 -0.49345437  2.27300023]]

EPISODE #790.0
with a exploration of 5.95947556615%
and learning rate of 3.56009427254e-05
lasted 478 steps
theta
[[ 0.37562901  0.02409874 -0.39972775]
 [-1.78505604 -0.48711837  2.27217441]]

EPISODE #800.0
with a exploration of 5.88928150766%
and learning rate of 3.53774568839e-05
lasted 535 steps
theta
[[ 0.38745688  0.02339265 -0.41084953]
 [-1.79713147 -0.47554979  2.27268126]]

EPISODE #810.0
with a exploration of 5.8207217695%
and learning rate of 3.51581276967e-05
lasted 345 steps
theta
[[ 0.37309219  0.01914209 -0.39223428]
 [-1.81026543 -0.46579772  2.27606315]]

EPISODE #820.0
with a exploration of 5.75373993096%
and learning rate of 3.49428278907e-05
lasted 802 steps
theta
[[ 0.3868658   0.0165453  -0.4034111 ]
 [-1.81716625 -0.46615116  2.28331741]]

EPISODE #830.0
with a exploration of 5.68828213879%
and learning rate of 3.47314355824e-05
lasted 609 steps
theta
[[ 0.37275548  0.0320186  -0.40477409]
 [-1.81966988 -0.45912587  2.27879575]]

EPISODE #840.0
with a exploration of 5.62429696288%
and learning rate of 3.4523833988e-05
lasted 478 steps
theta
[[ 0.38552464  0.01812354 -0.40364818]
 [-1.83054229 -0.45493989  2.28548218]]

EPISODE #850.0
with a exploration of 5.5617352614%
and learning rate of 3.43199111527e-05
lasted 569 steps
theta
[[ 0.38049215  0.02531611 -0.40580825]
 [-1.84073919 -0.44625136  2.28699055]]

EPISODE #860.0
with a exploration of 5.50055005501%
and learning rate of 3.41195596967e-05
lasted 291 steps
theta
[[ 0.37827985  0.03094081 -0.40922065]
 [-1.84149763 -0.45247645  2.29397408]]

EPISODE #870.0
with a exploration of 5.44069640914%
and learning rate of 3.39226765776e-05
lasted 504 steps
theta
[[ 0.36685542  0.03189078 -0.3987462 ]
 [-1.85078825 -0.45940653  2.31019478]]

EPISODE #880.0
with a exploration of 5.382131324%
and learning rate of 3.37291628677e-05
lasted 431 steps
theta
[[ 0.33580041  0.02543464 -0.36123504]
 [-1.86566444 -0.45703194  2.32269638]]

EPISODE #890.0
with a exploration of 5.32481363152%
and learning rate of 3.35389235453e-05
lasted 409 steps
theta
[[ 0.3408117   0.01573687 -0.35654857]
 [-1.88280119 -0.45043177  2.33323296]]

EPISODE #900.0
with a exploration of 5.26870389884%
and learning rate of 3.33518672983e-05
lasted 538 steps
theta
[[ 0.33913466  0.02786793 -0.3670026 ]
 [-1.88690148 -0.44789834  2.33479982]]

EPISODE #910.0
with a exploration of 5.21376433785%
and learning rate of 3.31679063403e-05
lasted 625 steps
theta
[[ 0.34770197  0.01892751 -0.36662948]
 [-1.8942142  -0.44292195  2.33713615]]

EPISODE #920.0
with a exploration of 5.15995872033%
and learning rate of 3.29869562377e-05
lasted 537 steps
theta
[[  3.61295389e-01  -1.03386147e-03  -3.60261528e-01]
 [ -1.89254720e+00  -4.45402757e-01   2.33794996e+00]]

EPISODE #930.0
with a exploration of 5.10725229826%
and learning rate of 3.28089357462e-05
lasted 486 steps
theta
[[ 0.34954555  0.01280091 -0.36234646]
 [-1.90241031 -0.44734373  2.34975403]]

EPISODE #940.0
with a exploration of 5.05561172902%
and learning rate of 3.26337666582e-05
lasted 370 steps
theta
[[ 0.36189945  0.00953942 -0.37143887]
 [-1.91319554 -0.44234494  2.35554048]]

EPISODE #950.0
with a exploration of 5.00500500501%
and learning rate of 3.24613736581e-05
lasted 626 steps
theta
[[  3.51896665e-01  -2.05300925e-03  -3.49843656e-01]
 [ -1.91925177e+00  -4.43563504e-01   2.36281528e+00]]

EPISODE #960.0
with a exploration of 4.95540138751%
and learning rate of 3.2291684186e-05
lasted 854 steps
theta
[[ 0.33926046  0.00979938 -0.34905984]
 [-1.92169897 -0.44850846  2.37020743]]

EPISODE #970.0
with a exploration of 4.90677134446%
and learning rate of 3.21246283102e-05
lasted 394 steps
theta
[[ 0.34835452  0.00850093 -0.35685545]
 [-1.92277961 -0.4502093   2.37298891]]

EPISODE #980.0
with a exploration of 4.85908649174%
and learning rate of 3.1960138605e-05
lasted 298 steps
theta
[[ 0.35629336 -0.0032418  -0.35305156]
 [-1.92210504 -0.45293521  2.37504025]]

EPISODE #990.0
with a exploration of 4.81231953802%
and learning rate of 3.17981500374e-05
lasted 330 steps
theta
[[ 0.35867186 -0.00638773 -0.35228413]
 [-1.9256154  -0.46170442  2.38731982]]

EPISODE #1000.0
with a exploration of 4.7664442326%
and learning rate of 3.16385998584e-05
lasted 456 steps
theta
[[ 0.35223006 -0.00516924 -0.34706082]
 [-1.92769059 -0.46216227  2.38985286]]

Out[19]:
array([[ 0.35223006, -0.00516924, -0.34706082],
       [-1.92769059, -0.46216227,  2.38985286]])

In [7]:
car1.loaddata(dataname ='alpha_1e-2_feature2_schedule_50')


[[ 0.25388317  0.21858183 -0.472465  ]
 [-1.57287585 -0.21461857  1.78749442]]
[13462, 127558, 74528, 148730, 18525, 50972, 4241, 8444, 24491, 1825, 5958, 44107, 4372, 3662, 9045, 10580, 3056, 5752, 3607, 12686, 10526, 7184, 1656, 6008, 2659, 6048, 2337, 5057, 1029, 4072, 1565, 3740, 3601, 753, 3011, 2961, 2492, 3993, 2382, 1633, 1872, 1207, 1540, 2339, 2299, 1684, 964, 1907, 1242, 1776, 1124, 2464, 1362, 1245, 959, 1833, 2670, 1456, 2801, 841, 1502, 1462, 2187, 2083, 803, 1283, 1755, 640, 577, 1276, 1277, 1445, 766, 654, 1695, 1413, 1013, 1510, 882, 1048, 2542, 1139, 868, 1428, 761, 1374, 1648, 1232, 1520, 394, 2329, 849, 923, 1060, 1348, 965, 1358, 1618, 843, 682, 753, 968, 1282, 753, 1692, 916, 753, 957, 1302, 1138, 913, 668, 1046, 938, 869, 564, 602, 1008, 642, 1130, 679, 1110, 657, 728, 1241, 854, 915, 1269, 834, 1016, 665, 985, 720, 1187, 666, 858, 648, 1236, 914, 979, 612, 1365, 1244, 1193, 477, 551, 834, 1062, 487, 1157, 820, 855, 581, 585, 763, 685, 436, 1192, 674, 827, 938, 714, 863, 1013, 1251, 550, 688, 893, 402, 1404, 898, 628, 479, 971, 568, 588, 755, 1230, 1520, 777, 594, 751, 1167, 748, 1025, 596, 827, 1699, 783, 994, 695, 723, 1061, 856, 562, 792, 1148, 1147, 827, 725, 652, 1028, 399, 880, 541, 896, 1294, 948, 792, 653, 1329, 589, 597, 740, 921, 1138, 885, 777, 799, 1299, 853, 1278, 538, 978, 663, 814, 555, 789, 541, 446, 596, 743, 1045, 567, 887, 1050, 475, 771, 361, 1055, 751, 745, 926, 521, 978, 837, 708, 483, 917, 722, 1619, 1068, 611, 370, 710, 621, 740, 739, 1288, 831, 734, 547, 640, 595, 692, 708, 797, 398, 479, 1095, 514, 627, 898, 1056, 705, 492, 501, 864, 715, 761, 555, 1311, 494, 696, 523, 546, 774, 472, 498, 473, 1053, 686, 458, 696, 631, 718, 393, 508, 585, 937, 1091, 620, 1219, 1021, 1026, 400, 603, 320, 926, 375, 1266, 931, 701, 499, 539, 1224, 1186, 635, 464, 563, 655, 509, 536, 990, 802, 679, 815, 1346, 584, 433, 898, 513, 593, 398, 1053, 558, 685, 903, 764, 627, 944, 754, 887, 657, 946, 794, 1077, 514, 911, 703, 439, 802, 1056, 464, 450, 665, 832, 887, 398, 859, 696, 522, 405, 593, 834, 607, 606, 516, 618, 881, 500, 817, 1180, 577, 327, 640, 809, 518, 1202, 528, 854, 603, 466, 541, 767, 756, 702, 504, 473, 660, 725, 757, 502, 399, 496, 577, 796, 955, 887, 923, 543, 463, 960, 375, 400, 1085, 587, 1051, 543, 1151, 667, 476, 559, 419, 674, 407, 566, 579, 496, 557, 676, 704, 401, 473, 330, 638, 498, 513, 813, 810, 474, 541, 705, 651, 506, 598, 406, 601, 399, 404]
[13462, 127558, 74528, 148730, 18525, 50972, 4241, 8444, 24491, 1825, 5958, 44107, 4372, 3662, 9045, 10580, 3056, 5752, 3607, 12686, 10526, 7184, 1656, 6008, 2659, 6048, 2337, 5057, 1029, 4072, 1565, 3740, 3601, 753, 3011, 2961, 2492, 3993, 2382, 1633, 1872, 1207, 1540, 2339, 2299, 1684, 964, 1907, 1242, 1776, 1124, 2464, 1362, 1245, 959, 1833, 2670, 1456, 2801, 841, 1502, 1462, 2187, 2083, 803, 1283, 1755, 640, 577, 1276, 1277, 1445, 766, 654, 1695, 1413, 1013, 1510, 882, 1048, 2542, 1139, 868, 1428, 761, 1374, 1648, 1232, 1520, 394, 2329, 849, 923, 1060, 1348, 965, 1358, 1618, 843, 682, 753, 968, 1282, 753, 1692, 916, 753, 957, 1302, 1138, 913, 668, 1046, 938, 869, 564, 602, 1008, 642, 1130, 679, 1110, 657, 728, 1241, 854, 915, 1269, 834, 1016, 665, 985, 720, 1187, 666, 858, 648, 1236, 914, 979, 612, 1365, 1244, 1193, 477, 551, 834, 1062, 487, 1157, 820, 855, 581, 585, 763, 685, 436, 1192, 674, 827, 938, 714, 863, 1013, 1251, 550, 688, 893, 402, 1404, 898, 628, 479, 971, 568, 588, 755, 1230, 1520, 777, 594, 751, 1167, 748, 1025, 596, 827, 1699, 783, 994, 695, 723, 1061, 856, 562, 792, 1148, 1147, 827, 725, 652, 1028, 399, 880, 541, 896, 1294, 948, 792, 653, 1329, 589, 597, 740, 921, 1138, 885, 777, 799, 1299, 853, 1278, 538, 978, 663, 814, 555, 789, 541, 446, 596, 743, 1045, 567, 887, 1050, 475, 771, 361, 1055, 751, 745, 926, 521, 978, 837, 708, 483, 917, 722, 1619, 1068, 611, 370, 710, 621, 740, 739, 1288, 831, 734, 547, 640, 595, 692, 708, 797, 398, 479, 1095, 514, 627, 898, 1056, 705, 492, 501, 864, 715, 761, 555, 1311, 494, 696, 523, 546, 774, 472, 498, 473, 1053, 686, 458, 696, 631, 718, 393, 508, 585, 937, 1091, 620, 1219, 1021, 1026, 400, 603, 320, 926, 375, 1266, 931, 701, 499, 539, 1224, 1186, 635, 464, 563, 655, 509, 536, 990, 802, 679, 815, 1346, 584, 433, 898, 513, 593, 398, 1053, 558, 685, 903, 764, 627, 944, 754, 887, 657, 946, 794, 1077, 514, 911, 703, 439, 802, 1056, 464, 450, 665, 832, 887, 398, 859, 696, 522, 405, 593, 834, 607, 606, 516, 618, 881, 500, 817, 1180, 577, 327, 640, 809, 518, 1202, 528, 854, 603, 466, 541, 767, 756, 702, 504, 473, 660, 725, 757, 502, 399, 496, 577, 796, 955, 887, 923, 543, 463, 960, 375, 400, 1085, 587, 1051, 543, 1151, 667, 476, 559, 419, 674, 407, 566, 579, 496, 557, 676, 704, 401, 473, 330, 638, 498, 513, 813, 810, 474, 541, 705, 651, 506, 598, 406, 601, 399, 404]

In [16]:
car1.plot_training()



In [ ]:
pkl_file = open('alpha_1e-2_feature2_schedule_50', 'rb')
        self.theta = cPickle.load(pkl_file)

        pkl_file.close()