In [0]:
# !apt-get install python-opengl -y >/dev/null
# !apt install xvfb -y >/dev/null
In [0]:
# !pip install pyvirtualdisplay >/dev/null
# !pip install piglet >/dev/null
In [3]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()
Out[3]:
In [0]:
# !pip install git+https://github.com/openai/baselines >/dev/null
# !pip install gym >/dev/null
In [0]:
# !pip install JSAnimation >/dev/null
In [0]:
%matplotlib inline
from JSAnimation.IPython_display import display_animation
from matplotlib import animation
import matplotlib.pyplot as plt
from IPython.display import display
def display_frames_as_gif(frames):
"""
Displays a list of frames as a gif, with controls
"""
plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 144)
patch = plt.imshow(frames[0])
plt.axis('off')
def animate(i):
patch.set_data(frames[i])
anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=50)
display(display_animation(anim, default_mode='once'))
In [0]:
!rm -r /content/logs/dqn
In [8]:
%env OPENAI_LOGDIR=/content/logs/dqn
# %env OPENAI_LOG_FORMAT=csv
In [9]:
%env
Out[9]:
In [10]:
import gym
from baselines import deepq
env = gym.make("MsPacman-v0")
# https://en.wikipedia.org/wiki/Q-learning#Influence_of_variables
# %time model = deepq.learn(\
# env,\
# seed=42,\
# network='mlp',\
# lr=1e-3,\
# total_timesteps=100000,\
# buffer_size=50000,\
# exploration_fraction=0.1,\
# exploration_final_eps=0.02,\
# print_freq=10)
%time model = deepq.learn(\
env,\
seed=42,\
network='cnn',\
lr=1e-3,\
total_timesteps=50000,\
buffer_size=50000,\
exploration_fraction=0.5,\
exploration_final_eps=0.02,\
print_freq=10)
In [0]:
# from baselines.common import plot_util as pu
# results = pu.load_results('/content/logs/dqn')
In [0]:
# import matplotlib.pyplot as plt
# import numpy as np
# r = results[0]
# plt.plot(r.progress.total_timesteps, r.progress.eprewmean)
In [13]:
import numpy as np
observation = env.reset()
state = np.zeros((1, 2*128))
dones = np.zeros((1))
frames = []
cumulated_reward = 0
for t in range(1000):
frames.append(env.render(mode = 'rgb_array'))
actions, _, state, _ = model.step(observation, S=state, M=dones)
observation, reward, done, info = env.step(actions)
cumulated_reward += reward
if done:
print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
break
env.close()
In [14]:
display_frames_as_gif(frames)
In [0]: