In [1]:
import mountain_car_montecarlo_pg_linear as mcpg
In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [18]:
car1 = mcpg.mountain_car(init_alpha= 1e-3 )
[2016-05-15 15:15:04,476] Making new env: MountainCar-v0
In [19]:
car1.train(iter=1000, dataname ='alpha_1e-3_feature2_schedule_50' , save = True)
EPISODE #10.0
with a exploration of 84.7457627119%
and learning rate of 0.000333333333333
lasted 2611 steps
theta
[[ 0.77620391 -0.57578263 -0.20042128]
[-0.71084185 -0.32222901 1.03307086]]
EPISODE #20.0
with a exploration of 72.4637681159%
and learning rate of 0.000229415733871
lasted 1511 steps
theta
[[ 0.84555875 -0.45466554 -0.39089321]
[-0.91590881 -0.42922704 1.34513586]]
EPISODE #30.0
with a exploration of 63.2911392405%
and learning rate of 0.000185695338177
lasted 2259 steps
theta
[[ 0.76985668 -0.33228628 -0.43757039]
[-1.08514913 -0.38189019 1.46703932]]
EPISODE #40.0
with a exploration of 56.1797752809%
and learning rate of 0.000160128153805
lasted 1296 steps
theta
[[ 0.77188386 -0.3118855 -0.45999836]
[-1.16678854 -0.32316812 1.48995666]]
EPISODE #50.0
with a exploration of 50.5050505051%
and learning rate of 0.000142857142857
lasted 972 steps
theta
[[ 0.79142005 -0.32951745 -0.4619026 ]
[-1.18866802 -0.35998428 1.54865229]]
EPISODE #60.0
with a exploration of 45.871559633%
and learning rate of 0.000130188910981
lasted 672 steps
theta
[[ 0.77539324 -0.31383504 -0.46155821]
[-1.21619223 -0.37770635 1.59389858]]
EPISODE #70.0
with a exploration of 42.0168067227%
and learning rate of 0.000120385853086
lasted 815 steps
theta
[[ 0.78112481 -0.37413943 -0.40698538]
[-1.17827455 -0.44452815 1.62280271]]
EPISODE #80.0
with a exploration of 38.7596899225%
and learning rate of 0.000112508790093
lasted 2004 steps
theta
[[ 0.74063189 -0.3572525 -0.3833794 ]
[-1.213462 -0.4135937 1.62705571]]
EPISODE #90.0
with a exploration of 35.9712230216%
and learning rate of 0.000105999788001
lasted 1327 steps
theta
[[ 0.73168615 -0.27862593 -0.45306022]
[-1.22266188 -0.40336846 1.62603034]]
EPISODE #100.0
with a exploration of 33.5570469799%
and learning rate of 0.000100503781526
lasted 843 steps
theta
[[ 0.62621087 -0.23369859 -0.39251228]
[-1.25739894 -0.39366011 1.65105905]]
EPISODE #110.0
with a exploration of 31.4465408805%
and learning rate of 9.57826285221e-05
lasted 1290 steps
theta
[[ 0.60923567 -0.229025 -0.38021067]
[-1.28809578 -0.3808642 1.66895998]]
EPISODE #120.0
with a exploration of 29.5857988166%
and learning rate of 9.16698497028e-05
lasted 810 steps
theta
[[ 0.59038136 -0.21612029 -0.37426107]
[-1.27528688 -0.36852976 1.64381664]]
EPISODE #130.0
with a exploration of 27.9329608939%
and learning rate of 8.80450906326e-05
lasted 1240 steps
theta
[[ 0.58670655 -0.18869642 -0.39801013]
[-1.29256748 -0.37311023 1.6656777 ]]
EPISODE #140.0
with a exploration of 26.455026455%
and learning rate of 8.4818892968e-05
lasted 1022 steps
theta
[[ 0.58473067 -0.19796432 -0.38676636]
[-1.31165531 -0.38298786 1.69464317]]
EPISODE #150.0
with a exploration of 25.1256281407%
and learning rate of 8.19231920519e-05
lasted 491 steps
theta
[[ 0.58325984 -0.16677664 -0.4164832 ]
[-1.3056862 -0.38234026 1.68802646]]
EPISODE #160.0
with a exploration of 23.9234449761%
and learning rate of 7.93051585718e-05
lasted 401 steps
theta
[[ 0.60138443 -0.1481592 -0.45322523]
[-1.32413627 -0.36661215 1.69074842]]
EPISODE #170.0
with a exploration of 22.8310502283%
and learning rate of 7.69230769231e-05
lasted 834 steps
theta
[[ 0.58889088 -0.13978231 -0.44910857]
[-1.33454797 -0.37160049 1.70614846]]
EPISODE #180.0
with a exploration of 21.8340611354%
and learning rate of 7.47435092752e-05
lasted 923 steps
theta
[[ 0.55856205 -0.14760932 -0.41095274]
[-1.33793388 -0.39846707 1.73640095]]
EPISODE #190.0
with a exploration of 20.9205020921%
and learning rate of 7.27392967453e-05
lasted 731 steps
theta
[[ 0.57440306 -0.12675272 -0.44765034]
[-1.35203977 -0.41002902 1.76206879]]
EPISODE #200.0
with a exploration of 20.0803212851%
and learning rate of 7.08881205008e-05
lasted 617 steps
theta
[[ 0.58424427 -0.11979724 -0.46444703]
[-1.36259472 -0.4092865 1.77188121]]
EPISODE #210.0
with a exploration of 19.305019305%
and learning rate of 6.91714463866e-05
lasted 595 steps
theta
[[ 0.58051146 -0.09010382 -0.49040764]
[-1.37768966 -0.41824269 1.79593235]]
EPISODE #220.0
with a exploration of 18.5873605948%
and learning rate of 6.75737378399e-05
lasted 534 steps
theta
[[ 0.5618192 -0.07723904 -0.48458016]
[-1.40314094 -0.41053852 1.81367945]]
EPISODE #230.0
with a exploration of 17.9211469534%
and learning rate of 6.60818600455e-05
lasted 754 steps
theta
[[ 0.56760759 -0.06342291 -0.50418468]
[-1.40194868 -0.41798605 1.81993473]]
EPISODE #240.0
with a exploration of 17.3010380623%
and learning rate of 6.46846227353e-05
lasted 550 steps
theta
[[ 0.56117423 -0.07381393 -0.48736031]
[-1.39525597 -0.43618483 1.8314408 ]]
EPISODE #250.0
with a exploration of 16.7224080268%
and learning rate of 6.33724250524e-05
lasted 909 steps
theta
[[ 0.54944675 -0.07969613 -0.46975063]
[-1.41248639 -0.44764105 1.86012744]]
EPISODE #260.0
with a exploration of 16.1812297735%
and learning rate of 6.21369766001e-05
lasted 610 steps
theta
[[ 0.57746023 -0.107717 -0.46974322]
[-1.42695785 -0.43816554 1.86512339]]
EPISODE #270.0
with a exploration of 15.6739811912%
and learning rate of 6.0971076085e-05
lasted 653 steps
theta
[[ 0.54740075 -0.09104999 -0.45635075]
[-1.4647128 -0.40811 1.8728228 ]]
EPISODE #280.0
with a exploration of 15.1975683891%
and learning rate of 5.98684340089e-05
lasted 792 steps
theta
[[ 0.54660155 -0.0770425 -0.46955905]
[-1.48585368 -0.39457616 1.88042984]]
EPISODE #290.0
with a exploration of 14.7492625369%
and learning rate of 5.88235294118e-05
lasted 813 steps
theta
[[ 0.54217206 -0.07859153 -0.46358053]
[-1.49623819 -0.41225271 1.90849091]]
EPISODE #300.0
with a exploration of 14.3266475645%
and learning rate of 5.78314931966e-05
lasted 595 steps
theta
[[ 0.53326688 -0.07054183 -0.46272505]
[-1.50550514 -0.41730634 1.92281148]]
EPISODE #310.0
with a exploration of 13.9275766017%
and learning rate of 5.68880123989e-05
lasted 650 steps
theta
[[ 0.54626544 -0.07537857 -0.47088687]
[-1.49374491 -0.43018338 1.92392829]]
EPISODE #320.0
with a exploration of 13.5501355014%
and learning rate of 5.59892510956e-05
lasted 412 steps
theta
[[ 0.5594136 -0.11571233 -0.44370127]
[-1.49584594 -0.43746578 1.93331172]]
EPISODE #330.0
with a exploration of 13.1926121372%
and learning rate of 5.5131784642e-05
lasted 864 steps
theta
[[ 0.5425537 -0.08568291 -0.45687079]
[-1.49689621 -0.42826726 1.92516346]]
EPISODE #340.0
with a exploration of 12.853470437%
and learning rate of 5.43125446594e-05
lasted 596 steps
theta
[[ 0.54449088 -0.10408113 -0.44040976]
[-1.49157301 -0.44657582 1.93814883]]
EPISODE #350.0
with a exploration of 12.5313283208%
and learning rate of 5.35287727572e-05
lasted 745 steps
theta
[[ 0.54341595 -0.10366727 -0.43974868]
[-1.50343429 -0.44321699 1.94665128]]
EPISODE #360.0
with a exploration of 12.2249388753%
and learning rate of 5.27779813969e-05
lasted 664 steps
theta
[[ 0.53130777 -0.07181084 -0.45949693]
[-1.51837368 -0.43356798 1.95194166]]
EPISODE #370.0
with a exploration of 11.9331742243%
and learning rate of 5.20579206295e-05
lasted 885 steps
theta
[[ 0.51699821 -0.05605214 -0.46094608]
[-1.51751494 -0.43903305 1.95654798]]
EPISODE #380.0
with a exploration of 11.655011655%
and learning rate of 5.13665496938e-05
lasted 696 steps
theta
[[ 0.52644761 -0.07034312 -0.45610449]
[-1.52013016 -0.4464757 1.96660587]]
EPISODE #390.0
with a exploration of 11.3895216401%
and learning rate of 5.07020126563e-05
lasted 448 steps
theta
[[ 0.52475412 -0.08458279 -0.44017132]
[-1.53352644 -0.44351728 1.97704372]]
EPISODE #400.0
with a exploration of 11.135857461%
and learning rate of 5.00626174322e-05
lasted 1121 steps
theta
[[ 0.51538128 -0.07137079 -0.4440105 ]
[-1.54457724 -0.44299641 1.98757364]]
EPISODE #410.0
with a exploration of 10.8932461874%
and learning rate of 4.94468176434e-05
lasted 447 steps
theta
[[ 0.52998823 -0.0734383 -0.45654993]
[-1.54259528 -0.43892714 1.98152242]]
EPISODE #420.0
with a exploration of 10.6609808102%
and learning rate of 4.88531968746e-05
lasted 698 steps
theta
[[ 0.50951132 -0.04271798 -0.46679334]
[-1.55202539 -0.43999165 1.99201705]]
EPISODE #430.0
with a exploration of 10.4384133612%
and learning rate of 4.82804549585e-05
lasted 960 steps
theta
[[ 0.50260434 -0.04585933 -0.45674501]
[-1.56450618 -0.43523961 1.9997458 ]]
EPISODE #440.0
with a exploration of 10.2249488753%
and learning rate of 4.77273959903e-05
lasted 697 steps
theta
[[ 0.49165298 -0.03194207 -0.45971092]
[-1.57101922 -0.43696997 2.00798919]]
EPISODE #450.0
with a exploration of 10.0200400802%
and learning rate of 4.71929178183e-05
lasted 524 steps
theta
[[ 0.48182901 -0.0334475 -0.44838151]
[-1.58916588 -0.42735206 2.01651794]]
EPISODE #460.0
with a exploration of 9.8231827112%
and learning rate of 4.66760028009e-05
lasted 381 steps
theta
[[ 0.46841135 -0.02561109 -0.44280026]
[-1.61164557 -0.42168561 2.03333119]]
EPISODE #470.0
with a exploration of 9.63391136802%
and learning rate of 4.6175709654e-05
lasted 441 steps
theta
[[ 0.43990791 -0.02831404 -0.41159387]
[-1.62220902 -0.43395926 2.05616829]]
EPISODE #480.0
with a exploration of 9.45179584121%
and learning rate of 4.56911662385e-05
lasted 695 steps
theta
[[ 0.43425713 -0.01978345 -0.41447368]
[-1.63619226 -0.4215898 2.05778206]]
EPISODE #490.0
with a exploration of 9.27643784787%
and learning rate of 4.52215631646e-05
lasted 717 steps
theta
[[ 0.43014575 -0.02287411 -0.40727164]
[-1.64820477 -0.42897945 2.07718422]]
EPISODE #500.0
with a exploration of 9.10746812386%
and learning rate of 4.47661481036e-05
lasted 706 steps
theta
[[ 0.42029411 -0.00811168 -0.41218243]
[-1.65252815 -0.42987751 2.08240565]]
EPISODE #510.0
with a exploration of 8.94454382826%
and learning rate of 4.43242207178e-05
lasted 737 steps
theta
[[ 0.42319839 -0.004036 -0.41916239]
[-1.65608378 -0.42854424 2.08462802]]
EPISODE #520.0
with a exploration of 8.78734622144%
and learning rate of 4.38951281306e-05
lasted 575 steps
theta
[[ 0.40888545 -0.00916538 -0.39972007]
[-1.65910879 -0.43282139 2.09193017]]
EPISODE #530.0
with a exploration of 8.63557858377%
and learning rate of 4.34782608696e-05
lasted 762 steps
theta
[[ 0.40942984 -0.00769773 -0.40173211]
[-1.66172447 -0.43610155 2.09782602]]
EPISODE #540.0
with a exploration of 8.48896434635%
and learning rate of 4.30730492254e-05
lasted 474 steps
theta
[[ 0.41103468 -0.00623718 -0.4047975 ]
[-1.67501128 -0.43965406 2.11466535]]
EPISODE #550.0
with a exploration of 8.34724540902%
and learning rate of 4.26789599776e-05
lasted 528 steps
theta
[[ 4.00858034e-01 -7.21421132e-04 -4.00136613e-01]
[ -1.67400613e+00 -4.46299604e-01 2.12030573e+00]]
EPISODE #560.0
with a exploration of 8.21018062397%
and learning rate of 4.22954934438e-05
lasted 424 steps
theta
[[ 0.41017936 -0.00614094 -0.40403842]
[-1.68093312 -0.44058561 2.12151872]]
EPISODE #570.0
with a exploration of 8.07754442649%
and learning rate of 4.1922180815e-05
lasted 372 steps
theta
[[ 4.12592635e-01 -1.53916737e-04 -4.12438719e-01]
[ -1.68101184e+00 -4.45464672e-01 2.12647651e+00]]
EPISODE #580.0
with a exploration of 7.94912559618%
and learning rate of 4.15585817462e-05
lasted 736 steps
theta
[[ 0.42654684 0.00439489 -0.43094173]
[-1.68522178 -0.44935289 2.13457467]]
EPISODE #590.0
with a exploration of 7.82472613459%
and learning rate of 4.12042821715e-05
lasted 431 steps
theta
[[ 0.42665385 0.01129158 -0.43794542]
[-1.68743311 -0.45541922 2.14285233]]
EPISODE #600.0
with a exploration of 7.70416024653%
and learning rate of 4.08588923223e-05
lasted 843 steps
theta
[[ 0.42895728 0.02295815 -0.45191543]
[-1.69320697 -0.46812175 2.16132872]]
EPISODE #610.0
with a exploration of 7.58725341426%
and learning rate of 4.05220449237e-05
lasted 633 steps
theta
[[ 0.43694134 0.0043081 -0.44124944]
[-1.68917336 -0.47495413 2.16412749]]
EPISODE #620.0
with a exploration of 7.47384155456%
and learning rate of 4.01933935529e-05
lasted 480 steps
theta
[[ 0.44115897 0.0115286 -0.45268757]
[-1.7023437 -0.47184142 2.17418512]]
EPISODE #630.0
with a exploration of 7.36377025037%
and learning rate of 3.98726111414e-05
lasted 811 steps
theta
[[ 0.4408341 0.00328535 -0.44411945]
[-1.71545255 -0.46972908 2.18518163]]
EPISODE #640.0
with a exploration of 7.25689404935%
and learning rate of 3.95593886065e-05
lasted 535 steps
theta
[[ 0.45154865 0.01517335 -0.46672201]
[-1.71763833 -0.48494026 2.20257859]]
EPISODE #650.0
with a exploration of 7.1530758226%
and learning rate of 3.92534335989e-05
lasted 499 steps
theta
[[ 0.43915077 0.01819375 -0.45734452]
[-1.72685544 -0.47548711 2.20234254]]
EPISODE #660.0
with a exploration of 7.05218617772%
and learning rate of 3.89544693566e-05
lasted 333 steps
theta
[[ 0.4379731 0.02581651 -0.46378962]
[-1.72576 -0.48747689 2.2132369 ]]
EPISODE #670.0
with a exploration of 6.95410292072%
and learning rate of 3.86622336514e-05
lasted 516 steps
theta
[[ 0.44338116 0.01636397 -0.45974514]
[-1.72227925 -0.48117911 2.20345836]]
EPISODE #680.0
with a exploration of 6.85871056241%
and learning rate of 3.83764778227e-05
lasted 369 steps
theta
[[ 0.43836757 0.02106587 -0.45943343]
[-1.72352985 -0.48350429 2.20703414]]
EPISODE #690.0
with a exploration of 6.76589986468%
and learning rate of 3.8096965888e-05
lasted 505 steps
theta
[[ 0.43951915 0.01854206 -0.45806121]
[-1.73335536 -0.49323651 2.22659187]]
EPISODE #700.0
with a exploration of 6.67556742323%
and learning rate of 3.78234737236e-05
lasted 425 steps
theta
[[ 0.42861409 0.00986498 -0.43847908]
[-1.73930394 -0.49205246 2.2313564 ]]
EPISODE #710.0
with a exploration of 6.58761528327%
and learning rate of 3.75557883094e-05
lasted 572 steps
theta
[[ 0.42615456 0.00480912 -0.43096368]
[-1.74373797 -0.49743114 2.24116911]]
EPISODE #720.0
with a exploration of 6.50195058518%
and learning rate of 3.72937070314e-05
lasted 403 steps
theta
[[ 0.42297845 0.01629359 -0.43927203]
[-1.74897781 -0.50479981 2.25377761]]
EPISODE #730.0
with a exploration of 6.41848523748%
and learning rate of 3.7037037037e-05
lasted 614 steps
theta
[[ 0.41002351 0.03466703 -0.44469054]
[-1.75207891 -0.5058332 2.25791211]]
EPISODE #740.0
with a exploration of 6.3371356147%
and learning rate of 3.67855946389e-05
lasted 415 steps
theta
[[ 0.39292928 0.04657682 -0.4395061 ]
[-1.76091698 -0.50641574 2.26733272]]
EPISODE #750.0
with a exploration of 6.25782227785%
and learning rate of 3.65392047621e-05
lasted 340 steps
theta
[[ 0.39198403 0.02617802 -0.41816205]
[-1.76384585 -0.50788656 2.27173241]]
EPISODE #760.0
with a exploration of 6.1804697157%
and learning rate of 3.6297700432e-05
lasted 524 steps
theta
[[ 0.38957741 0.02873745 -0.41831486]
[-1.76969431 -0.49664883 2.26634314]]
EPISODE #770.0
with a exploration of 6.10500610501%
and learning rate of 3.60609222987e-05
lasted 561 steps
theta
[[ 0.39100719 0.02194447 -0.41295166]
[-1.77177956 -0.50116329 2.27294285]]
EPISODE #780.0
with a exploration of 6.03136308806%
and learning rate of 3.5828718195e-05
lasted 586 steps
theta
[[ 0.37841849 0.03353254 -0.41195104]
[-1.77954586 -0.49345437 2.27300023]]
EPISODE #790.0
with a exploration of 5.95947556615%
and learning rate of 3.56009427254e-05
lasted 478 steps
theta
[[ 0.37562901 0.02409874 -0.39972775]
[-1.78505604 -0.48711837 2.27217441]]
EPISODE #800.0
with a exploration of 5.88928150766%
and learning rate of 3.53774568839e-05
lasted 535 steps
theta
[[ 0.38745688 0.02339265 -0.41084953]
[-1.79713147 -0.47554979 2.27268126]]
EPISODE #810.0
with a exploration of 5.8207217695%
and learning rate of 3.51581276967e-05
lasted 345 steps
theta
[[ 0.37309219 0.01914209 -0.39223428]
[-1.81026543 -0.46579772 2.27606315]]
EPISODE #820.0
with a exploration of 5.75373993096%
and learning rate of 3.49428278907e-05
lasted 802 steps
theta
[[ 0.3868658 0.0165453 -0.4034111 ]
[-1.81716625 -0.46615116 2.28331741]]
EPISODE #830.0
with a exploration of 5.68828213879%
and learning rate of 3.47314355824e-05
lasted 609 steps
theta
[[ 0.37275548 0.0320186 -0.40477409]
[-1.81966988 -0.45912587 2.27879575]]
EPISODE #840.0
with a exploration of 5.62429696288%
and learning rate of 3.4523833988e-05
lasted 478 steps
theta
[[ 0.38552464 0.01812354 -0.40364818]
[-1.83054229 -0.45493989 2.28548218]]
EPISODE #850.0
with a exploration of 5.5617352614%
and learning rate of 3.43199111527e-05
lasted 569 steps
theta
[[ 0.38049215 0.02531611 -0.40580825]
[-1.84073919 -0.44625136 2.28699055]]
EPISODE #860.0
with a exploration of 5.50055005501%
and learning rate of 3.41195596967e-05
lasted 291 steps
theta
[[ 0.37827985 0.03094081 -0.40922065]
[-1.84149763 -0.45247645 2.29397408]]
EPISODE #870.0
with a exploration of 5.44069640914%
and learning rate of 3.39226765776e-05
lasted 504 steps
theta
[[ 0.36685542 0.03189078 -0.3987462 ]
[-1.85078825 -0.45940653 2.31019478]]
EPISODE #880.0
with a exploration of 5.382131324%
and learning rate of 3.37291628677e-05
lasted 431 steps
theta
[[ 0.33580041 0.02543464 -0.36123504]
[-1.86566444 -0.45703194 2.32269638]]
EPISODE #890.0
with a exploration of 5.32481363152%
and learning rate of 3.35389235453e-05
lasted 409 steps
theta
[[ 0.3408117 0.01573687 -0.35654857]
[-1.88280119 -0.45043177 2.33323296]]
EPISODE #900.0
with a exploration of 5.26870389884%
and learning rate of 3.33518672983e-05
lasted 538 steps
theta
[[ 0.33913466 0.02786793 -0.3670026 ]
[-1.88690148 -0.44789834 2.33479982]]
EPISODE #910.0
with a exploration of 5.21376433785%
and learning rate of 3.31679063403e-05
lasted 625 steps
theta
[[ 0.34770197 0.01892751 -0.36662948]
[-1.8942142 -0.44292195 2.33713615]]
EPISODE #920.0
with a exploration of 5.15995872033%
and learning rate of 3.29869562377e-05
lasted 537 steps
theta
[[ 3.61295389e-01 -1.03386147e-03 -3.60261528e-01]
[ -1.89254720e+00 -4.45402757e-01 2.33794996e+00]]
EPISODE #930.0
with a exploration of 5.10725229826%
and learning rate of 3.28089357462e-05
lasted 486 steps
theta
[[ 0.34954555 0.01280091 -0.36234646]
[-1.90241031 -0.44734373 2.34975403]]
EPISODE #940.0
with a exploration of 5.05561172902%
and learning rate of 3.26337666582e-05
lasted 370 steps
theta
[[ 0.36189945 0.00953942 -0.37143887]
[-1.91319554 -0.44234494 2.35554048]]
EPISODE #950.0
with a exploration of 5.00500500501%
and learning rate of 3.24613736581e-05
lasted 626 steps
theta
[[ 3.51896665e-01 -2.05300925e-03 -3.49843656e-01]
[ -1.91925177e+00 -4.43563504e-01 2.36281528e+00]]
EPISODE #960.0
with a exploration of 4.95540138751%
and learning rate of 3.2291684186e-05
lasted 854 steps
theta
[[ 0.33926046 0.00979938 -0.34905984]
[-1.92169897 -0.44850846 2.37020743]]
EPISODE #970.0
with a exploration of 4.90677134446%
and learning rate of 3.21246283102e-05
lasted 394 steps
theta
[[ 0.34835452 0.00850093 -0.35685545]
[-1.92277961 -0.4502093 2.37298891]]
EPISODE #980.0
with a exploration of 4.85908649174%
and learning rate of 3.1960138605e-05
lasted 298 steps
theta
[[ 0.35629336 -0.0032418 -0.35305156]
[-1.92210504 -0.45293521 2.37504025]]
EPISODE #990.0
with a exploration of 4.81231953802%
and learning rate of 3.17981500374e-05
lasted 330 steps
theta
[[ 0.35867186 -0.00638773 -0.35228413]
[-1.9256154 -0.46170442 2.38731982]]
EPISODE #1000.0
with a exploration of 4.7664442326%
and learning rate of 3.16385998584e-05
lasted 456 steps
theta
[[ 0.35223006 -0.00516924 -0.34706082]
[-1.92769059 -0.46216227 2.38985286]]
Out[19]:
array([[ 0.35223006, -0.00516924, -0.34706082],
[-1.92769059, -0.46216227, 2.38985286]])
In [7]:
car1.loaddata(dataname ='alpha_1e-2_feature2_schedule_50')
[[ 0.25388317 0.21858183 -0.472465 ]
[-1.57287585 -0.21461857 1.78749442]]
[13462, 127558, 74528, 148730, 18525, 50972, 4241, 8444, 24491, 1825, 5958, 44107, 4372, 3662, 9045, 10580, 3056, 5752, 3607, 12686, 10526, 7184, 1656, 6008, 2659, 6048, 2337, 5057, 1029, 4072, 1565, 3740, 3601, 753, 3011, 2961, 2492, 3993, 2382, 1633, 1872, 1207, 1540, 2339, 2299, 1684, 964, 1907, 1242, 1776, 1124, 2464, 1362, 1245, 959, 1833, 2670, 1456, 2801, 841, 1502, 1462, 2187, 2083, 803, 1283, 1755, 640, 577, 1276, 1277, 1445, 766, 654, 1695, 1413, 1013, 1510, 882, 1048, 2542, 1139, 868, 1428, 761, 1374, 1648, 1232, 1520, 394, 2329, 849, 923, 1060, 1348, 965, 1358, 1618, 843, 682, 753, 968, 1282, 753, 1692, 916, 753, 957, 1302, 1138, 913, 668, 1046, 938, 869, 564, 602, 1008, 642, 1130, 679, 1110, 657, 728, 1241, 854, 915, 1269, 834, 1016, 665, 985, 720, 1187, 666, 858, 648, 1236, 914, 979, 612, 1365, 1244, 1193, 477, 551, 834, 1062, 487, 1157, 820, 855, 581, 585, 763, 685, 436, 1192, 674, 827, 938, 714, 863, 1013, 1251, 550, 688, 893, 402, 1404, 898, 628, 479, 971, 568, 588, 755, 1230, 1520, 777, 594, 751, 1167, 748, 1025, 596, 827, 1699, 783, 994, 695, 723, 1061, 856, 562, 792, 1148, 1147, 827, 725, 652, 1028, 399, 880, 541, 896, 1294, 948, 792, 653, 1329, 589, 597, 740, 921, 1138, 885, 777, 799, 1299, 853, 1278, 538, 978, 663, 814, 555, 789, 541, 446, 596, 743, 1045, 567, 887, 1050, 475, 771, 361, 1055, 751, 745, 926, 521, 978, 837, 708, 483, 917, 722, 1619, 1068, 611, 370, 710, 621, 740, 739, 1288, 831, 734, 547, 640, 595, 692, 708, 797, 398, 479, 1095, 514, 627, 898, 1056, 705, 492, 501, 864, 715, 761, 555, 1311, 494, 696, 523, 546, 774, 472, 498, 473, 1053, 686, 458, 696, 631, 718, 393, 508, 585, 937, 1091, 620, 1219, 1021, 1026, 400, 603, 320, 926, 375, 1266, 931, 701, 499, 539, 1224, 1186, 635, 464, 563, 655, 509, 536, 990, 802, 679, 815, 1346, 584, 433, 898, 513, 593, 398, 1053, 558, 685, 903, 764, 627, 944, 754, 887, 657, 946, 794, 1077, 514, 911, 703, 439, 802, 1056, 464, 450, 665, 832, 887, 398, 859, 696, 522, 405, 593, 834, 607, 606, 516, 618, 881, 500, 817, 1180, 577, 327, 640, 809, 518, 1202, 528, 854, 603, 466, 541, 767, 756, 702, 504, 473, 660, 725, 757, 502, 399, 496, 577, 796, 955, 887, 923, 543, 463, 960, 375, 400, 1085, 587, 1051, 543, 1151, 667, 476, 559, 419, 674, 407, 566, 579, 496, 557, 676, 704, 401, 473, 330, 638, 498, 513, 813, 810, 474, 541, 705, 651, 506, 598, 406, 601, 399, 404]
[13462, 127558, 74528, 148730, 18525, 50972, 4241, 8444, 24491, 1825, 5958, 44107, 4372, 3662, 9045, 10580, 3056, 5752, 3607, 12686, 10526, 7184, 1656, 6008, 2659, 6048, 2337, 5057, 1029, 4072, 1565, 3740, 3601, 753, 3011, 2961, 2492, 3993, 2382, 1633, 1872, 1207, 1540, 2339, 2299, 1684, 964, 1907, 1242, 1776, 1124, 2464, 1362, 1245, 959, 1833, 2670, 1456, 2801, 841, 1502, 1462, 2187, 2083, 803, 1283, 1755, 640, 577, 1276, 1277, 1445, 766, 654, 1695, 1413, 1013, 1510, 882, 1048, 2542, 1139, 868, 1428, 761, 1374, 1648, 1232, 1520, 394, 2329, 849, 923, 1060, 1348, 965, 1358, 1618, 843, 682, 753, 968, 1282, 753, 1692, 916, 753, 957, 1302, 1138, 913, 668, 1046, 938, 869, 564, 602, 1008, 642, 1130, 679, 1110, 657, 728, 1241, 854, 915, 1269, 834, 1016, 665, 985, 720, 1187, 666, 858, 648, 1236, 914, 979, 612, 1365, 1244, 1193, 477, 551, 834, 1062, 487, 1157, 820, 855, 581, 585, 763, 685, 436, 1192, 674, 827, 938, 714, 863, 1013, 1251, 550, 688, 893, 402, 1404, 898, 628, 479, 971, 568, 588, 755, 1230, 1520, 777, 594, 751, 1167, 748, 1025, 596, 827, 1699, 783, 994, 695, 723, 1061, 856, 562, 792, 1148, 1147, 827, 725, 652, 1028, 399, 880, 541, 896, 1294, 948, 792, 653, 1329, 589, 597, 740, 921, 1138, 885, 777, 799, 1299, 853, 1278, 538, 978, 663, 814, 555, 789, 541, 446, 596, 743, 1045, 567, 887, 1050, 475, 771, 361, 1055, 751, 745, 926, 521, 978, 837, 708, 483, 917, 722, 1619, 1068, 611, 370, 710, 621, 740, 739, 1288, 831, 734, 547, 640, 595, 692, 708, 797, 398, 479, 1095, 514, 627, 898, 1056, 705, 492, 501, 864, 715, 761, 555, 1311, 494, 696, 523, 546, 774, 472, 498, 473, 1053, 686, 458, 696, 631, 718, 393, 508, 585, 937, 1091, 620, 1219, 1021, 1026, 400, 603, 320, 926, 375, 1266, 931, 701, 499, 539, 1224, 1186, 635, 464, 563, 655, 509, 536, 990, 802, 679, 815, 1346, 584, 433, 898, 513, 593, 398, 1053, 558, 685, 903, 764, 627, 944, 754, 887, 657, 946, 794, 1077, 514, 911, 703, 439, 802, 1056, 464, 450, 665, 832, 887, 398, 859, 696, 522, 405, 593, 834, 607, 606, 516, 618, 881, 500, 817, 1180, 577, 327, 640, 809, 518, 1202, 528, 854, 603, 466, 541, 767, 756, 702, 504, 473, 660, 725, 757, 502, 399, 496, 577, 796, 955, 887, 923, 543, 463, 960, 375, 400, 1085, 587, 1051, 543, 1151, 667, 476, 559, 419, 674, 407, 566, 579, 496, 557, 676, 704, 401, 473, 330, 638, 498, 513, 813, 810, 474, 541, 705, 651, 506, 598, 406, 601, 399, 404]
In [16]:
car1.plot_training()
In [ ]:
pkl_file = open('alpha_1e-2_feature2_schedule_50', 'rb')
self.theta = cPickle.load(pkl_file)
pkl_file.close()
Content source: febert/DeepRL
Similar notebooks: