In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [ ]:
from __future__ import print_function
import numpy as np
from tf_rl.controller import DiscreteDeepQ, NL, HumanController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
In [ ]:
current_settings = {
'objects': [
'friend',
'enemy',
],
'colors': {
'hero': 'yellow',
'friend': 'green',
'enemy': 'red',
},
'object_reward': {
'friend': 0.1,
'enemy': -0.1,
},
'hero_bounces_off_walls': False,
'world_size': (700,500),
'hero_initial_position': [400, 300],
'hero_initial_speed': [0, 0],
"maximum_speed": [50, 50],
"object_radius": 10.0,
"num_objects": {
"friend" : 25,
"enemy" : 25,
},
"num_observation_lines" : 32,
"observation_line_length": 120.,
"tolerable_distance_to_wall": 50,
"wall_distance_penalty": -0.0,
"delta_v": 50
}
In [ ]:
# create the game simulator
g = KarpathyGame(current_settings)
In [ ]:
human_control = False
if human_control:
# WSAD CONTROL (requires extra setup - check out README)
current_controller = HumanController({b"w": 3, b"d": 0, b"s": 1,b"a": 2,})
else:
# The optimizer to use. Here we use RMSProp as recommended
# by the publication
# DiscreteDeepQ object
current_controller = DiscreteDeepQ(g.observation_size,
[200, 200, g.num_actions],
[NL.TANH, NL.TANH, NL.IDENTITY],
learning_rate=0.001, decay=0.9,
discount_rate=0.99,
exploration_period=5000,
max_experience=10000,
store_every_nth=4,
train_every_nth=4)
current_controller.initialize('')
In [ ]:
FPS = 30
ACTION_EVERY = 3
fast_mode = True
if fast_mode:
WAIT, VISUALIZE_EVERY = False, 50
else:
WAIT, VISUALIZE_EVERY = True, 1
try:
simulate(simulation=g,
controller=current_controller,
fps=FPS,
visualize_every=VISUALIZE_EVERY,
action_every=ACTION_EVERY,
wait=WAIT,
disable_training=False,
simulation_resolution=0.001,
save_path=None)
except KeyboardInterrupt:
print("Interrupted")
In [ ]:
g.plot_reward(smoothing=100)
Starting with the ray pointing all the way right, we have one row per ray in clockwise order. The numbers for each ray are the following:
Finally the last two numbers in the representation correspond to speed of the hero.
In [ ]:
g.__class__ = KarpathyGame
np.set_printoptions(formatter={'float': (lambda x: '%.2f' % (x,))})
x = g.observe()
new_shape = (x[:-4].shape[0]//g.eye_observation_size, g.eye_observation_size)
print(x[:-4].reshape(new_shape))
print(x[-4:])
g.to_html()
In [ ]: