This is a few-liner implementation using this fork of the DQN reinforcement learning algorithm. It relies on Keras as a back-end for deep learning.


In [ ]:
import gym
from dqn import Agent
num_episodes = 20
env = gym.make("MsPacman-v0")
agent = Agent(state_size=env.observation_space.shape,
              number_of_actions=env.action_space.n,
              save_name="MsPacman-v0")
for e in range(num_episodes):
    observation = env.reset()
    done = False
    agent.new_episode()
    total_cost = 0.0
    total_reward = 0.0
    frame = 0
    while not done:
        frame += 1
        #env.render()
        action, values = agent.act(observation)
        #action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        total_cost += agent.observe(reward)
        total_reward += reward
    print("total reward", total_reward)
print("mean cost", total_cost/frame)


[2017-03-09 09:24:29,637] Making new env: MsPacman-v0
Training a new model

In [ ]: