In [7]:
    
%load_ext autoreload
%autoreload 2
%matplotlib inline
    
    
In [8]:
    
import numpy as np
import tempfile
import tensorflow as tf
from tf_rl.controller import DiscreteDeepQ, HumanController
from tf_rl.controller import GreedyController, RandomController, WeightedRandomController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from __future__ import print_function
    
In [22]:
    
current_settings = {
    'objects': [
        'friend',
        'enemy',
    ],
    'colors': {
        'hero':   'yellow',
        'friend': 'green',
        'enemy':  'red',
    },
    'object_reward': {
        'friend': 0.1,
        'enemy': -0.1,
    },
    'hero_bounces_off_walls': False,
    'world_size': (700,500),
    'hero_initial_position': [400, 300],
    'hero_initial_speed':    [0,   0],
    "maximum_speed":         [50, 50],
    "object_radius": 10.0,
    "num_objects": {
        "friend" : 25,
        "enemy" :  25,
    },
    "num_observation_lines" : 32,
    #"observation_line_length": 120.,
    "observation_line_length": 80.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -0.0,
    "delta_v": 50
}
    
In [23]:
    
# Greedy human controller
# create the game simulator
g = KarpathyGame(current_settings)
current_controller = GreedyController({b"w": 3, b"d": 0, b"s": 1,b"a": 2,}, current_settings["num_observation_lines"])
    
In [24]:
    
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = True
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 1
else:
    WAIT, VISUALIZE_EVERY = True, 1
    
try:
    with tf.device("/cpu:0"):
        simulate(simulation=g,
                 controller=current_controller,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=True,
                 simulation_resolution=0.001,
                 max_frame=3000,
                 save_path=None)
except KeyboardInterrupt:
    print("Interrupted")
    
    
    
Starting with the ray pointing all the way right, we have one row per ray in clockwise order. The numbers for each ray are the following:
Finally the last two numbers in the representation correspond to speed of the hero.
In [31]:
    
g.__class__ = KarpathyGame
np.set_printoptions(formatter={'float': (lambda x: '%.2f' % (x,))})
x = g.observe()
new_shape = (x[:-2].shape[0]//g.eye_observation_size, g.eye_observation_size)
print(x[:-4].reshape(new_shape))
print(x[-4:])
g.to_html()
    
    
    Out[31]:
In [ ]: