In [1]:
import os
import sys
sys.path.append(os.path.abspath('../../'))
In [2]:
import gym
import numpy as np
from keras.layers import Activation, Dense
from keras.models import Sequential
from deeprl.agents.dqn import DQNAgent
from deeprl.core import Processor
Using TensorFlow backend.
In [3]:
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
[2017-10-01 03:09:00,252] Making new env: CartPole-v1
In [4]:
model = Sequential()
model.add(Dense(32, input_dim=state_size, activation='elu'))
model.add(Dense(32, activation='elu'))
model.add(Dense(action_size, activation='linear'))
model.compile(loss='mse', optimizer='adam')
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 32) 160
_________________________________________________________________
dense_2 (Dense) (None, 32) 1056
_________________________________________________________________
dense_3 (Dense) (None, 2) 66
=================================================================
Total params: 1,282
Trainable params: 1,282
Non-trainable params: 0
_________________________________________________________________
In [5]:
class CartpoleProcessor(Processor):
def process_observation(self, observation):
return observation.reshape((1, 4))
In [6]:
agent = DQNAgent(
model,
memory_limit=100000,
min_experiences=1000,
processor=CartpoleProcessor()
)
In [7]:
history = agent.train(env, n_episodes=200, min_experiences=agent.min_experiences, n_simulations=10, verbose=1)
Starting agent initialization...
Completed agent initialization in 1.80256009102 sec.
Starting agent training...
episode=1/200: episode_reward=12.0, avg_reward=42.6
episode=2/200: episode_reward=10.0, avg_reward=40.1
episode=3/200: episode_reward=43.0, avg_reward=34.9
episode=4/200: episode_reward=21.0, avg_reward=21.8
episode=5/200: episode_reward=22.0, avg_reward=23.3
episode=6/200: episode_reward=19.0, avg_reward=20.9
episode=7/200: episode_reward=39.0, avg_reward=43.7
episode=8/200: episode_reward=49.0, avg_reward=53.1
episode=9/200: episode_reward=95.0, avg_reward=49.7
episode=10/200: episode_reward=23.0, avg_reward=49.1
episode=11/200: episode_reward=86.0, avg_reward=38.1
episode=12/200: episode_reward=39.0, avg_reward=40.0
episode=13/200: episode_reward=61.0, avg_reward=58.1
episode=14/200: episode_reward=36.0, avg_reward=45.4
episode=15/200: episode_reward=57.0, avg_reward=45.0
episode=16/200: episode_reward=54.0, avg_reward=67.3
episode=17/200: episode_reward=45.0, avg_reward=39.2
episode=18/200: episode_reward=59.0, avg_reward=61.0
episode=19/200: episode_reward=44.0, avg_reward=58.9
episode=20/200: episode_reward=60.0, avg_reward=51.2
episode=21/200: episode_reward=45.0, avg_reward=53.4
episode=22/200: episode_reward=66.0, avg_reward=48.1
episode=23/200: episode_reward=29.0, avg_reward=49.2
episode=24/200: episode_reward=96.0, avg_reward=56.8
episode=25/200: episode_reward=44.0, avg_reward=40.6
episode=26/200: episode_reward=74.0, avg_reward=85.8
episode=27/200: episode_reward=117.0, avg_reward=65.9
episode=28/200: episode_reward=47.0, avg_reward=43.9
episode=29/200: episode_reward=36.0, avg_reward=61.3
episode=30/200: episode_reward=45.0, avg_reward=51.5
episode=31/200: episode_reward=56.0, avg_reward=42.9
episode=32/200: episode_reward=50.0, avg_reward=45.2
episode=33/200: episode_reward=34.0, avg_reward=60.3
episode=34/200: episode_reward=34.0, avg_reward=69.1
episode=35/200: episode_reward=65.0, avg_reward=67.0
episode=36/200: episode_reward=108.0, avg_reward=88.2
episode=37/200: episode_reward=99.0, avg_reward=159.4
episode=38/200: episode_reward=148.0, avg_reward=200.9
episode=39/200: episode_reward=219.0, avg_reward=239.7
episode=40/200: episode_reward=116.0, avg_reward=486.5
episode=41/200: episode_reward=303.0, avg_reward=500.0
episode=42/200: episode_reward=500.0, avg_reward=500.0
episode=43/200: episode_reward=500.0, avg_reward=500.0
episode=44/200: episode_reward=500.0, avg_reward=494.2
episode=45/200: episode_reward=485.0, avg_reward=500.0
episode=46/200: episode_reward=267.0, avg_reward=475.6
episode=47/200: episode_reward=354.0, avg_reward=493.6
episode=48/200: episode_reward=269.0, avg_reward=453.3
episode=49/200: episode_reward=439.0, avg_reward=443.2
episode=50/200: episode_reward=358.0, avg_reward=415.9
episode=51/200: episode_reward=382.0, avg_reward=273.2
episode=52/200: episode_reward=244.0, avg_reward=234.5
episode=53/200: episode_reward=241.0, avg_reward=436.1
episode=54/200: episode_reward=345.0, avg_reward=387.4
episode=55/200: episode_reward=269.0, avg_reward=376.6
episode=56/200: episode_reward=214.0, avg_reward=298.4
episode=57/200: episode_reward=244.0, avg_reward=371.7
episode=58/200: episode_reward=262.0, avg_reward=353.1
episode=59/200: episode_reward=362.0, avg_reward=335.5
episode=60/200: episode_reward=311.0, avg_reward=379.1
episode=61/200: episode_reward=399.0, avg_reward=289.9
episode=62/200: episode_reward=276.0, avg_reward=364.2
episode=63/200: episode_reward=292.0, avg_reward=272.8
episode=64/200: episode_reward=237.0, avg_reward=271.1
episode=65/200: episode_reward=230.0, avg_reward=323.4
episode=66/200: episode_reward=380.0, avg_reward=428.4
episode=67/200: episode_reward=294.0, avg_reward=355.1
episode=68/200: episode_reward=478.0, avg_reward=364.9
episode=69/200: episode_reward=277.0, avg_reward=349.6
episode=70/200: episode_reward=288.0, avg_reward=256.8
episode=71/200: episode_reward=256.0, avg_reward=256.8
episode=72/200: episode_reward=322.0, avg_reward=397.1
episode=73/200: episode_reward=305.0, avg_reward=384.0
episode=74/200: episode_reward=357.0, avg_reward=379.2
episode=75/200: episode_reward=304.0, avg_reward=440.9
episode=76/200: episode_reward=500.0, avg_reward=474.9
episode=77/200: episode_reward=352.0, avg_reward=473.8
episode=78/200: episode_reward=319.0, avg_reward=403.5
episode=79/200: episode_reward=334.0, avg_reward=414.4
episode=80/200: episode_reward=320.0, avg_reward=309.8
episode=81/200: episode_reward=353.0, avg_reward=396.5
episode=82/200: episode_reward=330.0, avg_reward=395.8
episode=83/200: episode_reward=249.0, avg_reward=247.0
episode=84/200: episode_reward=191.0, avg_reward=294.3
episode=85/200: episode_reward=210.0, avg_reward=449.3
episode=86/200: episode_reward=235.0, avg_reward=234.5
episode=87/200: episode_reward=284.0, avg_reward=269.0
episode=88/200: episode_reward=255.0, avg_reward=320.5
episode=89/200: episode_reward=219.0, avg_reward=238.1
episode=90/200: episode_reward=228.0, avg_reward=242.8
episode=91/200: episode_reward=258.0, avg_reward=216.6
episode=92/200: episode_reward=225.0, avg_reward=219.2
episode=93/200: episode_reward=258.0, avg_reward=215.2
episode=94/200: episode_reward=199.0, avg_reward=413.1
episode=95/200: episode_reward=423.0, avg_reward=247.1
episode=96/200: episode_reward=400.0, avg_reward=308.1
episode=97/200: episode_reward=209.0, avg_reward=330.9
episode=98/200: episode_reward=328.0, avg_reward=314.6
episode=99/200: episode_reward=271.0, avg_reward=258.2
episode=100/200: episode_reward=294.0, avg_reward=216.2
episode=101/200: episode_reward=212.0, avg_reward=202.4
episode=102/200: episode_reward=223.0, avg_reward=237.4
episode=103/200: episode_reward=263.0, avg_reward=271.2
episode=104/200: episode_reward=224.0, avg_reward=355.7
episode=105/200: episode_reward=282.0, avg_reward=276.2
episode=106/200: episode_reward=253.0, avg_reward=305.8
episode=107/200: episode_reward=270.0, avg_reward=231.1
episode=108/200: episode_reward=239.0, avg_reward=235.8
episode=109/200: episode_reward=245.0, avg_reward=306.1
episode=110/200: episode_reward=254.0, avg_reward=374.5
episode=111/200: episode_reward=267.0, avg_reward=302.0
episode=112/200: episode_reward=231.0, avg_reward=320.5
episode=113/200: episode_reward=321.0, avg_reward=313.6
episode=114/200: episode_reward=232.0, avg_reward=259.7
episode=115/200: episode_reward=286.0, avg_reward=240.2
episode=116/200: episode_reward=216.0, avg_reward=320.0
episode=117/200: episode_reward=412.0, avg_reward=240.6
episode=118/200: episode_reward=274.0, avg_reward=277.4
episode=119/200: episode_reward=227.0, avg_reward=262.1
episode=120/200: episode_reward=301.0, avg_reward=250.4
episode=121/200: episode_reward=256.0, avg_reward=256.2
episode=122/200: episode_reward=274.0, avg_reward=301.5
episode=123/200: episode_reward=248.0, avg_reward=300.2
episode=124/200: episode_reward=399.0, avg_reward=382.9
episode=125/200: episode_reward=289.0, avg_reward=421.2
episode=126/200: episode_reward=331.0, avg_reward=429.5
episode=127/200: episode_reward=361.0, avg_reward=317.0
episode=128/200: episode_reward=375.0, avg_reward=402.8
episode=129/200: episode_reward=500.0, avg_reward=467.3
episode=130/200: episode_reward=500.0, avg_reward=458.0
episode=131/200: episode_reward=500.0, avg_reward=409.7
episode=132/200: episode_reward=330.0, avg_reward=288.6
episode=133/200: episode_reward=243.0, avg_reward=242.5
episode=134/200: episode_reward=218.0, avg_reward=321.9
episode=135/200: episode_reward=500.0, avg_reward=295.6
episode=136/200: episode_reward=299.0, avg_reward=257.0
episode=137/200: episode_reward=285.0, avg_reward=264.3
episode=138/200: episode_reward=308.0, avg_reward=237.8
episode=139/200: episode_reward=292.0, avg_reward=265.6
episode=140/200: episode_reward=232.0, avg_reward=441.0
episode=141/200: episode_reward=304.0, avg_reward=325.3
episode=142/200: episode_reward=214.0, avg_reward=333.5
episode=143/200: episode_reward=299.0, avg_reward=251.8
episode=144/200: episode_reward=352.0, avg_reward=313.8
episode=145/200: episode_reward=274.0, avg_reward=292.6
episode=146/200: episode_reward=333.0, avg_reward=327.2
episode=147/200: episode_reward=314.0, avg_reward=392.9
episode=148/200: episode_reward=396.0, avg_reward=266.7
episode=149/200: episode_reward=275.0, avg_reward=223.6
episode=150/200: episode_reward=316.0, avg_reward=429.8
episode=151/200: episode_reward=313.0, avg_reward=235.6
episode=152/200: episode_reward=276.0, avg_reward=413.5
episode=153/200: episode_reward=500.0, avg_reward=390.3
episode=154/200: episode_reward=350.0, avg_reward=387.6
episode=155/200: episode_reward=361.0, avg_reward=397.2
episode=156/200: episode_reward=294.0, avg_reward=423.8
episode=157/200: episode_reward=456.0, avg_reward=349.2
episode=158/200: episode_reward=293.0, avg_reward=377.8
episode=159/200: episode_reward=403.0, avg_reward=382.7
episode=160/200: episode_reward=324.0, avg_reward=301.8
episode=161/200: episode_reward=286.0, avg_reward=228.8
episode=162/200: episode_reward=225.0, avg_reward=260.7
episode=163/200: episode_reward=346.0, avg_reward=236.0
episode=164/200: episode_reward=316.0, avg_reward=213.1
episode=165/200: episode_reward=175.0, avg_reward=10.5
episode=166/200: episode_reward=10.0, avg_reward=10.5
episode=167/200: episode_reward=13.0, avg_reward=12.5
episode=168/200: episode_reward=12.0, avg_reward=16.6
episode=169/200: episode_reward=200.0, avg_reward=344.1
episode=170/200: episode_reward=215.0, avg_reward=321.9
episode=171/200: episode_reward=437.0, avg_reward=265.2
episode=172/200: episode_reward=253.0, avg_reward=315.0
episode=173/200: episode_reward=462.0, avg_reward=257.3
episode=174/200: episode_reward=270.0, avg_reward=354.1
episode=175/200: episode_reward=347.0, avg_reward=390.2
episode=176/200: episode_reward=371.0, avg_reward=442.4
episode=177/200: episode_reward=474.0, avg_reward=451.5
episode=178/200: episode_reward=447.0, avg_reward=422.7
episode=179/200: episode_reward=430.0, avg_reward=407.6
episode=180/200: episode_reward=500.0, avg_reward=372.9
episode=181/200: episode_reward=278.0, avg_reward=415.8
episode=182/200: episode_reward=437.0, avg_reward=364.0
episode=183/200: episode_reward=500.0, avg_reward=353.6
episode=184/200: episode_reward=440.0, avg_reward=376.1
episode=185/200: episode_reward=271.0, avg_reward=377.4
episode=186/200: episode_reward=500.0, avg_reward=308.3
episode=187/200: episode_reward=264.0, avg_reward=437.9
episode=188/200: episode_reward=500.0, avg_reward=410.2
episode=189/200: episode_reward=283.0, avg_reward=299.0
episode=190/200: episode_reward=500.0, avg_reward=366.9
episode=191/200: episode_reward=500.0, avg_reward=480.9
episode=192/200: episode_reward=357.0, avg_reward=380.8
episode=193/200: episode_reward=500.0, avg_reward=451.9
episode=194/200: episode_reward=500.0, avg_reward=475.2
episode=195/200: episode_reward=500.0, avg_reward=381.2
episode=196/200: episode_reward=408.0, avg_reward=390.1
episode=197/200: episode_reward=348.0, avg_reward=448.6
episode=198/200: episode_reward=500.0, avg_reward=410.6
episode=199/200: episode_reward=342.0, avg_reward=313.8
episode=200/200: episode_reward=448.0, avg_reward=497.9
Completed agent training in 3647.03733301 sec.
In [ ]:
Content source: haoyio/deep-rl
Similar notebooks: