In [7]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [8]:
import numpy as np
import tempfile
import tensorflow as tf
from tf_rl.controller import DiscreteDeepQ, HumanController
from tf_rl.controller import GreedyController, RandomController, WeightedRandomController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from __future__ import print_function
In [22]:
current_settings = {
'objects': [
'friend',
'enemy',
],
'colors': {
'hero': 'yellow',
'friend': 'green',
'enemy': 'red',
},
'object_reward': {
'friend': 0.1,
'enemy': -0.1,
},
'hero_bounces_off_walls': False,
'world_size': (700,500),
'hero_initial_position': [400, 300],
'hero_initial_speed': [0, 0],
"maximum_speed": [50, 50],
"object_radius": 10.0,
"num_objects": {
"friend" : 25,
"enemy" : 25,
},
"num_observation_lines" : 32,
#"observation_line_length": 120.,
"observation_line_length": 80.,
"tolerable_distance_to_wall": 50,
"wall_distance_penalty": -0.0,
"delta_v": 50
}
In [23]:
# Greedy human controller
# create the game simulator
g = KarpathyGame(current_settings)
current_controller = GreedyController({b"w": 3, b"d": 0, b"s": 1,b"a": 2,}, current_settings["num_observation_lines"])
In [24]:
FPS = 30
ACTION_EVERY = 3
fast_mode = True
if fast_mode:
WAIT, VISUALIZE_EVERY = False, 1
else:
WAIT, VISUALIZE_EVERY = True, 1
try:
with tf.device("/cpu:0"):
simulate(simulation=g,
controller=current_controller,
fps=FPS,
visualize_every=VISUALIZE_EVERY,
action_every=ACTION_EVERY,
wait=WAIT,
disable_training=True,
simulation_resolution=0.001,
max_frame=3000,
save_path=None)
except KeyboardInterrupt:
print("Interrupted")
Starting with the ray pointing all the way right, we have one row per ray in clockwise order. The numbers for each ray are the following:
Finally the last two numbers in the representation correspond to speed of the hero.
In [31]:
g.__class__ = KarpathyGame
np.set_printoptions(formatter={'float': (lambda x: '%.2f' % (x,))})
x = g.observe()
new_shape = (x[:-2].shape[0]//g.eye_observation_size, g.eye_observation_size)
print(x[:-4].reshape(new_shape))
print(x[-4:])
g.to_html()
Out[31]:
In [ ]: