In [1]:
import tensorflow as tf
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from collections import deque


Using TensorFlow backend.

In [2]:
env = gym.make('CartPole-v0')


[2017-07-11 23:09:36,751] Making new env: CartPole-v0

In [4]:
state = env.reset()
for i in range(500):
    next_state, reward, done, _ = env.step(env.action_space.sample())
    # print(reward)
    if done:
        print(i)
        break


10

In [42]:
lr = 0.001
mem_size = 2000
episodes = 2000
steps = 1000

epsilon = 1
epsilon_min = 0.01
epsilon_decay = 0.0001

gamma = 0.99
batch_size = 32

In [43]:
bot = Sequential()
bot.add(Dense(units=24, input_dim=4, activation='relu'))
bot.add(Dense(units=24, activation='relu'))
bot.add(Dense(units=2, activation='linear'))
bot.compile(loss='mse', optimizer=Adam(lr))

memory = deque(maxlen=mem_size)

In [44]:
env.reset()
state, reward, done, _ = env.step(env.action_space.sample())
for i in range(batch_size):
    action = env.action_space.sample()
    next_state, reward, done, _ = env.step(action)
    
    if done:
        next_state = np.zeros(state.shape)
        memory.append((state, action, next_state, reward, done))
        env.reset()
        state, reward, done, _ = env.step(env.action_space.sample())
    else:
        memory.append((state, action, next_state, reward, done))
        state = next_state
len(memory)


Out[44]:
32

In [46]:
reward_list = []
step = 0
for episode in range(episodes):
    state = env.reset()
    total_rewards = 0
    t = 0 
    while t < steps:
        step = step + 1
        explore = epsilon_min + (epsilon - epsilon_min) * np.exp(-epsilon_decay * step)
        if explore >= np.random.rand():
            action = env.action_space.sample()
        else:
            action = np.argmax(bot.predict(np.reshape(state, (1, 4))))
        next_state, reward, done, _ = env.step(action)
        total_rewards = total_rewards + reward
        # reward = reward if not done else -10
        if done:
            reward_list.append(total_rewards)
            next_state = np.zeros(state.shape)
            memory.append((state, action, next_state, reward, done))
            print('episode:{} steps:{} rewards:{} explore:{}'.format(episode + 1, t + 1, total_rewards, explore))
            env.reset()
            state, reward, done, _ = env.step(env.action_space.sample())
            t = steps
        else:
            memory.append((state, action, next_state, reward, done))
            state = next_state
            t = t+1
            
    
        minibatch = [memory[ii] for ii in np.random.choice(range(len(memory)), batch_size)]
        states = [each[0] for each in minibatch]
        actions = [each[1] for each in minibatch]
        next_states = [each[2] for each in minibatch]
        rewards = [each[3] for each in minibatch]
        dones = [each[4] for each in minibatch]
        
        next_states = np.reshape(next_states, (-1, 4))
        states = np.reshape(states, (-1, 4))
        actions = np.reshape(actions, (batch_size, 1))
        targetQs = bot.predict(next_states)
        episode_ends = (next_states == np.zeros(states[0].shape)).all(axis=1)
        targetQs[episode_ends] = (0, 0)
        
        targets = rewards + gamma * np.max(targetQs, axis=1)
        targets_f = bot.predict(states)
        
        for pos in range(len(actions)):
            targets_f[pos,actions[pos]] = targets[pos]
        
        bot.fit(states, targets_f, epochs=1, verbose=0)


episode:1 steps:10 rewards:10.0 explore:0.9990104948350412
episode:2 steps:12 rewards:12.0 explore:0.9978243940440459
episode:3 steps:10 rewards:10.0 explore:0.9968370633976026
episode:4 steps:12 rewards:12.0 explore:0.9956535691600873
episode:5 steps:16 rewards:16.0 explore:0.9940777844133959
episode:6 steps:23 rewards:23.0 explore:0.991817006400586
episode:7 steps:21 rewards:21.0 explore:0.9897573540790046
episode:8 steps:10 rewards:10.0 explore:0.9887780864403506
episode:9 steps:17 rewards:17.0 explore:0.9871155772266214
episode:10 steps:14 rewards:14.0 explore:0.9857485725450585
episode:11 steps:12 rewards:12.0 explore:0.9845783765160454
episode:12 steps:25 rewards:25.0 explore:0.9821449735958028
episode:13 steps:13 rewards:13.0 explore:0.9808820062367796
episode:14 steps:15 rewards:15.0 explore:0.979426774923765
episode:15 steps:13 rewards:13.0 explore:0.9781673389271324
episode:16 steps:35 rewards:35.0 explore:0.9747846763535254
episode:17 steps:16 rewards:16.0 explore:0.9732422551373824
episode:18 steps:24 rewards:24.0 explore:0.9709332456447684
episode:19 steps:33 rewards:33.0 explore:0.9677673924648986
episode:20 steps:13 rewards:13.0 explore:0.9665231038175522
episode:21 steps:14 rewards:14.0 explore:0.9651849084275527
episode:22 steps:16 rewards:16.0 explore:0.9636578345589392
episode:23 steps:20 rewards:20.0 explore:0.9617524249345822
episode:24 steps:14 rewards:14.0 explore:0.9604209038219343
episode:25 steps:24 rewards:24.0 explore:0.9581426286765082
episode:26 steps:14 rewards:14.0 explore:0.9568161577426717
episode:27 steps:15 rewards:15.0 explore:0.9553969981418508
episode:28 steps:19 rewards:19.0 explore:0.9536024492067297
episode:29 steps:15 rewards:15.0 explore:0.9521881065550976
episode:30 steps:12 rewards:12.0 explore:0.9510581589313993
episode:31 steps:18 rewards:18.0 explore:0.9493657778452432
episode:32 steps:13 rewards:13.0 explore:0.948145395754274
episode:33 steps:17 rewards:17.0 explore:0.9465519034337303
episode:34 steps:13 rewards:13.0 explore:0.9453351770028022
episode:35 steps:30 rewards:30.0 explore:0.9425333762742368
episode:36 steps:38 rewards:38.0 explore:0.9389964738151386
episode:37 steps:13 rewards:13.0 explore:0.9377895630611424
episode:38 steps:18 rewards:18.0 explore:0.9361210439653186
episode:39 steps:17 rewards:17.0 explore:0.9345479756774695
episode:40 steps:20 rewards:20.0 explore:0.9327007275899515
episode:41 steps:28 rewards:28.0 explore:0.9301207791660591
episode:42 steps:35 rewards:35.0 explore:0.9269059856094697
episode:43 steps:14 rewards:14.0 explore:0.9256232153782973
episode:44 steps:17 rewards:17.0 explore:0.9240679782382762
episode:45 steps:18 rewards:18.0 explore:0.9224241357794977
episode:46 steps:14 rewards:14.0 explore:0.9211476357479235
episode:47 steps:57 rewards:57.0 explore:0.9159688677345054
episode:48 steps:32 rewards:32.0 explore:0.9130744009745154
episode:49 steps:15 rewards:15.0 explore:0.9117208048239659
episode:50 steps:22 rewards:22.0 explore:0.9097391996183267
episode:51 steps:20 rewards:20.0 explore:0.9079415194984367
episode:52 steps:12 rewards:12.0 explore:0.9068646359344029
episode:53 steps:28 rewards:28.0 explore:0.9043569273841263
episode:54 steps:23 rewards:23.0 explore:0.9023022702126513
episode:55 steps:25 rewards:25.0 explore:0.9000743006594618
episode:56 steps:38 rewards:38.0 explore:0.8966984365211077
episode:57 steps:25 rewards:25.0 explore:0.894484459054751
episode:58 steps:21 rewards:21.0 explore:0.892628990614483
episode:59 steps:12 rewards:12.0 explore:0.8915704710644979
episode:60 steps:30 rewards:30.0 explore:0.8889297227543306
episode:61 steps:58 rewards:58.0 explore:0.8838466854200648
episode:62 steps:25 rewards:25.0 explore:0.8816647972031857
episode:63 steps:16 rewards:16.0 explore:0.8802712486637826
episode:64 steps:28 rewards:28.0 explore:0.8778378974490139
episode:65 steps:10 rewards:10.0 explore:0.87697049332591
episode:66 steps:36 rewards:36.0 explore:0.873855010783234
episode:67 steps:20 rewards:20.0 explore:0.872129027320458
episode:68 steps:16 rewards:16.0 explore:0.870750723813589
episode:69 steps:19 rewards:19.0 explore:0.8691168501098853
episode:70 steps:120 rewards:120.0 explore:0.8588690576366207
episode:71 steps:46 rewards:46.0 explore:0.8549732272510244
episode:72 steps:36 rewards:36.0 explore:0.8519367924948307
episode:73 steps:17 rewards:17.0 explore:0.8505067158571417
episode:74 steps:16 rewards:16.0 explore:0.8491629803868099
episode:75 steps:15 rewards:15.0 explore:0.8479051795027305
episode:76 steps:18 rewards:18.0 explore:0.8463983067719388
episode:77 steps:16 rewards:16.0 explore:0.8450611395001836
episode:78 steps:33 rewards:33.0 explore:0.8423099796502623
episode:79 steps:86 rewards:86.0 explore:0.8351828046050659
episode:80 steps:45 rewards:45.0 explore:0.8314778244418622
episode:81 steps:55 rewards:55.0 explore:0.8269720985119177
episode:82 steps:19 rewards:19.0 explore:0.8254213252258911
episode:83 steps:17 rewards:17.0 explore:0.8240362865894115
episode:84 steps:43 rewards:43.0 explore:0.820543445547202
episode:85 steps:32 rewards:32.0 explore:0.8179538520807833
episode:86 steps:47 rewards:47.0 explore:0.814165378862013
episode:87 steps:25 rewards:25.0 explore:0.8121574763387944
episode:88 steps:20 rewards:20.0 explore:0.8105547646320608
episode:89 steps:56 rewards:56.0 explore:0.8060841872498941
episode:90 steps:34 rewards:34.0 explore:0.8033820971693942
episode:91 steps:20 rewards:20.0 explore:0.8017969186819356
episode:92 steps:38 rewards:38.0 explore:0.7987937999303244
episode:93 steps:14 rewards:14.0 explore:0.7976902612677303
episode:94 steps:35 rewards:35.0 explore:0.794938164332362
episode:95 steps:20 rewards:20.0 explore:0.7933698568339648
episode:96 steps:22 rewards:22.0 explore:0.7916483375145275
episode:97 steps:37 rewards:37.0 explore:0.7887615824558878
episode:98 steps:23 rewards:23.0 explore:0.7869724890623339
episode:99 steps:14 rewards:14.0 explore:0.7858854886554748
episode:100 steps:15 rewards:15.0 explore:0.7847225328573942
episode:101 steps:36 rewards:36.0 explore:0.7819385459222962
episode:102 steps:33 rewards:33.0 explore:0.779395347306421
episode:103 steps:30 rewards:30.0 explore:0.7770906200838809
episode:104 steps:26 rewards:26.0 explore:0.7750987749923544
episode:105 steps:44 rewards:44.0 explore:0.7717397356881056
episode:106 steps:47 rewards:47.0 explore:0.7681679591802079
episode:107 steps:24 rewards:24.0 explore:0.7663505378561265
episode:108 steps:62 rewards:62.0 explore:0.7616756715820135
episode:109 steps:32 rewards:32.0 explore:0.7592741539105202
episode:110 steps:12 rewards:12.0 explore:0.7583755641874922
episode:111 steps:28 rewards:28.0 explore:0.7562830435038377
episode:112 steps:19 rewards:19.0 explore:0.7548664519093531
episode:113 steps:82 rewards:82.0 explore:0.7487835211046464
episode:114 steps:45 rewards:45.0 explore:0.7454664642351635
episode:115 steps:20 rewards:20.0 explore:0.7439970012594898
episode:116 steps:14 rewards:14.0 explore:0.7429701244392238
episode:117 steps:14 rewards:14.0 explore:0.7419446842406366
episode:118 steps:27 rewards:27.0 explore:0.7399710991320363
episode:119 steps:16 rewards:16.0 explore:0.7388040792383043
episode:120 steps:43 rewards:43.0 explore:0.7356769498441607
episode:121 steps:32 rewards:32.0 explore:0.7333584951106472
episode:122 steps:31 rewards:31.0 explore:0.7311195559245596
episode:123 steps:31 rewards:31.0 explore:0.7288875467029541
episode:124 steps:22 rewards:22.0 explore:0.7273077325329861
episode:125 steps:50 rewards:50.0 explore:0.723730145291728
episode:126 steps:27 rewards:27.0 explore:0.7218056731060077
episode:127 steps:45 rewards:45.0 explore:0.7186097438110731
episode:128 steps:67 rewards:67.0 explore:0.7138779278120575
episode:129 steps:41 rewards:41.0 explore:0.7109979363249636
episode:130 steps:24 rewards:24.0 explore:0.7093175585377096
episode:131 steps:41 rewards:41.0 explore:0.7064562262870673
episode:132 steps:44 rewards:44.0 explore:0.7033985507107204
episode:133 steps:14 rewards:14.0 explore:0.7024284719533018
episode:134 steps:34 rewards:34.0 explore:0.7000782128532133
episode:135 steps:47 rewards:47.0 explore:0.6968424552396832
episode:136 steps:60 rewards:60.0 explore:0.6927337389831562
episode:137 steps:35 rewards:35.0 explore:0.6903483477664306
episode:138 steps:29 rewards:29.0 explore:0.6883781956592117
episode:139 steps:15 rewards:15.0 explore:0.6873613911597483
episode:140 steps:14 rewards:14.0 explore:0.6864137487166164
episode:141 steps:27 rewards:27.0 explore:0.6845898949057172
episode:142 steps:35 rewards:35.0 explore:0.6822329573203615
episode:143 steps:27 rewards:27.0 explore:0.6804203764209535
episode:144 steps:21 rewards:21.0 explore:0.6790139708731487
episode:145 steps:14 rewards:14.0 explore:0.6780780066417624
episode:146 steps:172 rewards:172.0 explore:0.666685322874171
episode:147 steps:25 rewards:25.0 explore:0.6650456599995699
episode:148 steps:22 rewards:22.0 explore:0.6636061435962195
episode:149 steps:15 rewards:15.0 explore:0.662626469320221
episode:150 steps:24 rewards:24.0 explore:0.6610620438553345
episode:151 steps:59 rewards:59.0 explore:0.6572320872785489
episode:152 steps:37 rewards:37.0 explore:0.6548417534002648
episode:153 steps:13 rewards:13.0 explore:0.6540040037760834
episode:154 steps:18 rewards:18.0 explore:0.6528458392300822
episode:155 steps:109 rewards:109.0 explore:0.6458768694661592
episode:156 steps:25 rewards:25.0 explore:0.644289162752816
episode:157 steps:94 rewards:94.0 explore:0.6383547799190329
episode:158 steps:86 rewards:86.0 explore:0.6329740989029881
episode:159 steps:74 rewards:74.0 explore:0.6283811056056463
episode:160 steps:99 rewards:99.0 explore:0.6222893366708193
episode:161 steps:154 rewards:154.0 explore:0.6129323148790947
episode:162 steps:67 rewards:67.0 explore:0.6089061710124799
episode:163 steps:70 rewards:70.0 explore:0.6047284668389445
episode:164 steps:111 rewards:111.0 explore:0.598163483918067
episode:165 steps:120 rewards:120.0 explore:0.5911477009977649
episode:166 steps:82 rewards:82.0 explore:0.5864017747402162
episode:167 steps:17 rewards:17.0 explore:0.585422724151946
episode:168 steps:105 rewards:105.0 explore:0.5794123949962157
episode:169 steps:102 rewards:102.0 explore:0.5736339089455001
episode:170 steps:141 rewards:141.0 explore:0.5657424364519852
episode:171 steps:159 rewards:159.0 explore:0.5569760094926471
episode:172 steps:80 rewards:80.0 explore:0.5526176580669251
episode:173 steps:85 rewards:85.0 explore:0.5480249546148961
episode:174 steps:116 rewards:116.0 explore:0.5418199238984404
episode:175 steps:36 rewards:36.0 explore:0.5399088142338003
episode:176 steps:177 rewards:177.0 explore:0.530611948202898
episode:177 steps:178 rewards:178.0 explore:0.5214270436859028
episode:178 steps:27 rewards:27.0 explore:0.5200480531429205
episode:179 steps:109 rewards:109.0 explore:0.5145187189797594
episode:180 steps:104 rewards:104.0 explore:0.5092989143342809
episode:181 steps:113 rewards:113.0 explore:0.5036885946071121
episode:182 steps:81 rewards:81.0 explore:0.4997058688058192
episode:183 steps:93 rewards:93.0 explore:0.4951727160588499
episode:184 steps:117 rewards:117.0 explore:0.489529273795622
episode:185 steps:150 rewards:150.0 explore:0.48239001300525325
episode:186 steps:82 rewards:82.0 explore:0.47853225332956967
episode:187 steps:141 rewards:141.0 explore:0.4719723048758076
episode:188 steps:176 rewards:176.0 explore:0.4639127246594539
episode:189 steps:93 rewards:93.0 explore:0.45971090506580253
episode:190 steps:182 rewards:182.0 explore:0.4516001979100396
episode:191 steps:87 rewards:87.0 explore:0.4477749401870996
episode:192 steps:135 rewards:135.0 explore:0.44190469182514325
episode:193 steps:58 rewards:58.0 explore:0.43940689522485077
episode:194 steps:49 rewards:49.0 explore:0.43730794805844847
episode:195 steps:177 rewards:177.0 explore:0.4298111398523363
episode:196 steps:134 rewards:134.0 explore:0.42422319343337667
episode:197 steps:28 rewards:28.0 explore:0.4230649907322372
episode:198 steps:177 rewards:177.0 explore:0.41581806488812556
episode:199 steps:102 rewards:102.0 explore:0.4116997596884293
episode:200 steps:153 rewards:153.0 explore:0.40560053144151603
episode:201 steps:139 rewards:139.0 explore:0.400139724585387
episode:202 steps:180 rewards:180.0 explore:0.3931800346627668
episode:203 steps:145 rewards:145.0 explore:0.38766401196965733
episode:204 steps:135 rewards:135.0 explore:0.38259980809657296
episode:205 steps:94 rewards:94.0 explore:0.37911377990173073
episode:206 steps:152 rewards:152.0 explore:0.37354567524676435
episode:207 steps:168 rewards:168.0 explore:0.3674891253709585
episode:208 steps:151 rewards:151.0 explore:0.36213159076139456
episode:209 steps:116 rewards:116.0 explore:0.35807046438023477
episode:210 steps:115 rewards:115.0 explore:0.354090582223783
episode:211 steps:160 rewards:160.0 explore:0.34862894254015425
episode:212 steps:152 rewards:152.0 explore:0.3435207035798906
episode:213 steps:162 rewards:162.0 explore:0.33816119739389056
episode:214 steps:139 rewards:139.0 explore:0.3336313123856117
episode:215 steps:140 rewards:140.0 explore:0.3291320423900188
episode:216 steps:166 rewards:166.0 explore:0.32387817820476267
episode:217 steps:159 rewards:159.0 explore:0.3189269814940543
episode:218 steps:165 rewards:165.0 explore:0.3138715086459193
episode:219 steps:142 rewards:142.0 explore:0.30958702505004204
episode:220 steps:136 rewards:136.0 explore:0.3055402221436548
episode:221 steps:153 rewards:153.0 explore:0.3010528725062148
episode:222 steps:153 rewards:153.0 explore:0.29663365676480624
episode:223 steps:155 rewards:155.0 explore:0.292225089742209
episode:224 steps:162 rewards:162.0 explore:0.28768987769065657
episode:225 steps:148 rewards:148.0 explore:0.283610330614261
episode:226 steps:150 rewards:150.0 explore:0.2795368034868498
episode:227 steps:146 rewards:146.0 explore:0.2756301540913975
episode:228 steps:142 rewards:142.0 explore:0.27188486042156196
episode:229 steps:161 rewards:161.0 explore:0.2677022743337365
episode:230 steps:187 rewards:187.0 explore:0.2629280202047348
episode:231 steps:178 rewards:178.0 explore:0.2584657336143629
episode:232 steps:15 rewards:15.0 explore:0.2580933143981821
episode:233 steps:174 rewards:174.0 explore:0.2538138302109242
episode:234 steps:157 rewards:157.0 explore:0.25001584527183957
episode:235 steps:10 rewards:10.0 explore:0.24977594939449774
episode:236 steps:12 rewards:12.0 explore:0.24948839082487317
episode:237 steps:18 rewards:18.0 explore:0.24905769945990353
episode:238 steps:12 rewards:12.0 explore:0.24877100227326726
episode:239 steps:17 rewards:17.0 explore:0.2483654363980704
episode:240 steps:9 rewards:9.0 explore:0.248151004014359
episode:241 steps:10 rewards:10.0 explore:0.24791297204616472
episode:242 steps:17 rewards:17.0 explore:0.2475088635832026
episode:243 steps:31 rewards:31.0 explore:0.24677372615782642
episode:244 steps:51 rewards:51.0 explore:0.24556925416868575
episode:245 steps:11 rewards:11.0 explore:0.2453102704562562
episode:246 steps:12 rewards:12.0 explore:0.24502806748735437
episode:247 steps:13 rewards:13.0 explore:0.2447227295123064
episode:248 steps:11 rewards:11.0 explore:0.24446467646503917
episode:249 steps:12 rewards:12.0 explore:0.24418348760034264
episode:250 steps:10 rewards:10.0 explore:0.24394942116546525
episode:251 steps:11 rewards:11.0 explore:0.24369221828969956
episode:252 steps:14 rewards:14.0 explore:0.24336527809563002
episode:253 steps:10 rewards:10.0 explore:0.24313202946128895
episode:254 steps:12 rewards:12.0 explore:0.24285243881387475
episode:255 steps:15 rewards:15.0 explore:0.2425034219837172
episode:256 steps:200 rewards:200.0 explore:0.2378995457677203
episode:257 steps:200 rewards:200.0 explore:0.23338683240873156
episode:258 steps:200 rewards:200.0 explore:0.22896347676123718
episode:259 steps:187 rewards:187.0 explore:0.22490690688527995
episode:260 steps:200 rewards:200.0 explore:0.22065146501340982
episode:261 steps:191 rewards:191.0 explore:0.2166662024437604
episode:262 steps:197 rewards:197.0 explore:0.21263471875069387
episode:263 steps:182 rewards:182.0 explore:0.2089801245550366
episode:264 steps:200 rewards:200.0 explore:0.20504005410325982
episode:265 steps:200 rewards:200.0 explore:0.20117800227369304
episode:266 steps:200 rewards:200.0 explore:0.19739242419410977
episode:267 steps:200 rewards:200.0 explore:0.19368180558280307
episode:268 steps:200 rewards:200.0 explore:0.19004466214285295
episode:269 steps:200 rewards:200.0 explore:0.18647953896838743
episode:270 steps:200 rewards:200.0 explore:0.1829850099626012
episode:271 steps:200 rewards:200.0 explore:0.17955967726729755
episode:272 steps:200 rewards:200.0 explore:0.17620217070372668
episode:273 steps:200 rewards:200.0 explore:0.17291114722449577
episode:274 steps:200 rewards:200.0 explore:0.16968529037633223
episode:275 steps:200 rewards:200.0 explore:0.1665233097734848
episode:276 steps:200 rewards:200.0 explore:0.16342394058155213
episode:277 steps:200 rewards:200.0 explore:0.16038594301153183
episode:278 steps:200 rewards:200.0 explore:0.1574081018238888
episode:279 steps:200 rewards:200.0 explore:0.1544892258424429
episode:280 steps:200 rewards:200.0 explore:0.1516281474778827
episode:281 steps:200 rewards:200.0 explore:0.14882372226071408
episode:282 steps:200 rewards:200.0 explore:0.1460748283834574
episode:283 steps:200 rewards:200.0 explore:0.14338036625190936
episode:284 steps:200 rewards:200.0 explore:0.14073925804529067
episode:285 steps:200 rewards:200.0 explore:0.13815044728510345
episode:286 steps:200 rewards:200.0 explore:0.1356128984125257
episode:287 steps:200 rewards:200.0 explore:0.1331255963741739
episode:288 steps:200 rewards:200.0 explore:0.1306875462160683
episode:289 steps:188 rewards:188.0 explore:0.12843981522133696
episode:290 steps:196 rewards:196.0 explore:0.12614099685535285
episode:291 steps:183 rewards:183.0 explore:0.12403494575476331
episode:292 steps:200 rewards:200.0 explore:0.1217769025394268
episode:293 steps:176 rewards:176.0 explore:0.11982683994293791
episode:294 steps:200 rewards:200.0 explore:0.1176521228055411
episode:295 steps:200 rewards:200.0 explore:0.11552046795264734
episode:296 steps:196 rewards:196.0 explore:0.11347240337855564
episode:297 steps:178 rewards:178.0 explore:0.11164688986804804
episode:298 steps:183 rewards:183.0 explore:0.10980366869658935
episode:299 steps:200 rewards:200.0 explore:0.10782742364754383
episode:300 steps:200 rewards:200.0 explore:0.10589031087234035
episode:301 steps:184 rewards:184.0 explore:0.10414206236222205
episode:302 steps:200 rewards:200.0 explore:0.10227792462981188
episode:303 steps:178 rewards:178.0 explore:0.10064990988739762
episode:304 steps:200 rewards:200.0 explore:0.09885492140700405
episode:305 steps:155 rewards:155.0 explore:0.09748828888822834
episode:306 steps:169 rewards:169.0 explore:0.09602216048592836
episode:307 steps:200 rewards:200.0 explore:0.09431880758328776
episode:308 steps:168 rewards:168.0 explore:0.09291408433015039
episode:309 steps:157 rewards:157.0 explore:0.09162249868362875
episode:310 steps:82 rewards:82.0 explore:0.09095593085750762
episode:311 steps:191 rewards:191.0 explore:0.0894243457768294
episode:312 steps:44 rewards:44.0 explore:0.0890756463567038
episode:313 steps:9 rewards:9.0 explore:0.08900451029101404
episode:314 steps:9 rewards:9.0 explore:0.08893343821898189
episode:315 steps:9 rewards:9.0 explore:0.08886243008303903
episode:316 steps:16 rewards:16.0 explore:0.08873635108500146
episode:317 steps:10 rewards:10.0 explore:0.08865765408897255
episode:318 steps:48 rewards:48.0 explore:0.0882810020374408
episode:319 steps:9 rewards:9.0 explore:0.08821058082990395
episode:320 steps:9 rewards:9.0 explore:0.08814022297294181
episode:321 steps:11 rewards:11.0 explore:0.08805431598517714
episode:322 steps:9 rewards:9.0 explore:0.087984098703307
episode:323 steps:9 rewards:9.0 explore:0.08791394458856105
episode:324 steps:8 rewards:8.0 explore:0.08785163835870514
episode:325 steps:9 rewards:9.0 explore:0.087781603404639
episode:326 steps:11 rewards:11.0 explore:0.08769609068151414
episode:327 steps:8 rewards:8.0 explore:0.0876339586650892
episode:328 steps:9 rewards:9.0 explore:0.08756411953461347
episode:329 steps:12 rewards:12.0 explore:0.08747109841500625
episode:330 steps:12 rewards:12.0 explore:0.08737818885379413
episode:331 steps:9 rewards:9.0 explore:0.08730857981259285
episode:332 steps:200 rewards:200.0 explore:0.08577776736753293
episode:333 steps:176 rewards:176.0 explore:0.08445574657046587
episode:334 steps:132 rewards:132.0 explore:0.08347938885334161
episode:335 steps:178 rewards:178.0 explore:0.08218302757520575
episode:336 steps:160 rewards:160.0 explore:0.08103728948106396
episode:337 steps:193 rewards:193.0 explore:0.07967941542797229
episode:338 steps:150 rewards:150.0 explore:0.07864202408265734
episode:339 steps:152 rewards:152.0 explore:0.07760655481916162
episode:340 steps:200 rewards:200.0 explore:0.07626785534058264
episode:341 steps:200 rewards:200.0 explore:0.0749556638877231
episode:342 steps:200 rewards:200.0 explore:0.07366945556650568
episode:343 steps:200 rewards:200.0 explore:0.07240871587645228
episode:344 steps:198 rewards:198.0 explore:0.07118517631651812
episode:345 steps:200 rewards:200.0 explore:0.06997362865149095
episode:346 steps:200 rewards:200.0 explore:0.06878607123758344
episode:347 steps:200 rewards:200.0 explore:0.0676220290359957
episode:348 steps:200 rewards:200.0 explore:0.06648103641432632
episode:349 steps:200 rewards:200.0 explore:0.06536263696031322
episode:350 steps:200 rewards:200.0 explore:0.06426638329926256
episode:351 steps:200 rewards:200.0 explore:0.06319183691509302
episode:352 steps:200 rewards:200.0 explore:0.06213856797492346
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-46-ac7c3b96a6ab> in <module>()
     11             action = env.action_space.sample()
     12         else:
---> 13             action = np.argmax(bot.predict(np.reshape(state, (1, 4))))
     14         next_state, reward, done, _ = env.step(action)
     15         total_rewards = total_rewards + reward

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\keras\models.py in predict(self, x, batch_size, verbose)
    914         if self.model is None:
    915             self.build()
--> 916         return self.model.predict(x, batch_size=batch_size, verbose=verbose)
    917 
    918     def predict_on_batch(self, x):

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\keras\engine\training.py in predict(self, x, batch_size, verbose)
   1502         f = self.predict_function
   1503         return self._predict_loop(f, ins,
-> 1504                                   batch_size=batch_size, verbose=verbose)
   1505 
   1506     def train_on_batch(self, x, y,

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\keras\engine\training.py in _predict_loop(self, f, ins, batch_size, verbose)
   1126                 ins_batch = _slice_arrays(ins, batch_ids)
   1127 
-> 1128             batch_outs = f(ins_batch)
   1129             if not isinstance(batch_outs, list):
   1130                 batch_outs = [batch_outs]

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\keras\backend\tensorflow_backend.py in __call__(self, inputs)
   2265         updated = session.run(self.outputs + [self.updates_op],
   2266                               feed_dict=feed_dict,
-> 2267                               **self.session_kwargs)
   2268         return updated[:len(self.outputs)]
   2269 

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
    787     try:
    788       result = self._run(None, fetches, feed_dict, options_ptr,
--> 789                          run_metadata_ptr)
    790       if run_metadata:
    791         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    995     if final_fetches or final_targets:
    996       results = self._do_run(handle, final_targets, final_fetches,
--> 997                              feed_dict_string, options, run_metadata)
    998     else:
    999       results = []

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1130     if handle is None:
   1131       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1132                            target_list, options, run_metadata)
   1133     else:
   1134       return self._do_call(_prun_fn, self._session, handle, feed_dict,

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
   1137   def _do_call(self, fn, *args):
   1138     try:
-> 1139       return fn(*args)
   1140     except errors.OpError as e:
   1141       message = compat.as_text(e.message)

C:\Users\Abdul\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1119         return tf_session.TF_Run(session, options,
   1120                                  feed_dict, fetch_list, target_list,
-> 1121                                  status, run_metadata)
   1122 
   1123     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [47]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(reward_list)


Out[47]:
[<matplotlib.lines.Line2D at 0x1e5e9133048>]

In [ ]: