Baseline: Reflex Control

Steers the car using a PD controller on the estimated $y_m$ track median offset.


In [1]:
from DriveItMultiGym import *
from car import *
from sensors import *
from belief import *
import numpy as np
from numpy import pi

num_episodes = 1  # number of episodes to run
time_limit = 10.
gamma = 0.995
epsilon = 0.1
kp, kv = 30.0, 10.0
speed_limit = 1.5

random_position=False

cars = [Car.HighPerf(Color.green, speed_limit), \
        Car.Simple(Color.orange, 1.0), \
        Car.Simple(Color.purple, 1.2)]

pos = [PositionTracking(car) for car in cars]
env = DriveItEnv(cars, time_limit, gamma, noisy=True)
observations = env._reset(random_position)
beliefs = PositionTracking.reset_all(pos, observations)
ym_ = {c: 0.0 for c in cars}
done = False
reward_episode = {c: 0.0 for c in cars}
reward_sum = {c: 0.0 for c in cars}
crashes = 0
actions = {}

carpos = []; obs = []; belpos = []; bel = []; err = []

for i_episode in range(1, num_episodes + 1):
    while not done:
        if num_episodes < 2 and time_limit < 60.0:
            env.render()

        for c, b in beliefs.items():
            xm, ym, thetam, v, K = b
            
            ym_dot = (ym - ym_[c]) / dt
            ym_[c] = ym
            f = K + kp * ym + kv * ym_dot
            if f > epsilon: action = 2
            elif f < -epsilon: action = 1
            else: action = 0

            for j in range(min(3, len(c.dist_sensors))):
                sensor = c.dist_sensors[j]
                d = sensor.read(cars)
                if d < 0.18 \
                or j == 0 and d < 0.5 and xm > loop_median_length:
                    action += 6
                    break
            
            if action < 6 and v < speed_limit:
                action += 3
                            
            actions[c] = action
        
        carpos.append(cars[0].position)
        #obs.append(observations[cars[0]])
        #belpos.append(pos[0].position)
        bel.append(beliefs[cars[0]])
        e = cars[0].position[:-1] - pos[0].position[:-1]
        e2 = wrap(cars[0].position[2] - beliefs[cars[0]][2], -pi, pi)
        err.append((e[0], e[1], e2))

        observations, rewards, done, info = env.step(actions)
        beliefs = PositionTracking.update_all(pos, observations, dt)
        for car in cars:
            reward_episode[car] += rewards[car]

    if num_episodes <= 10:
        print('Episode %d %s. reward: %f, laps: %f' % \
              (i_episode, info['done'], reward_episode[cars[0]], \
               reward_episode[cars[0]] / lap_median_length))

    if info['done'] != 'complete':
        crashes += 1

    for car in cars:
        reward_sum[car] += reward_episode[car]
        reward_episode[car] = 0
    observations = env._reset(random_position)
    beliefs = PositionTracking.reset_all(pos, observations)
    done = False

if num_episodes > 1:
    print('Average reward: %f with %d crashes' % \
          (reward_sum[cars[0]] / num_episodes, crashes))


Episode 1 complete. reward: 7.960224, laps: 1.581200
env.close()

In [2]:
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
style.use('ggplot')
%matplotlib inline
sns.set()

In [3]:
from DriveItCircuit import *
belpos = [b[0:3] for b in bel]
belcart = [median_to_cartesian(s[0],s[1],s[2]) for s in bel]
median = [cartesian_to_median(p[0],p[1],p[2]) for p in carpos]
#medcart = [median_to_cartesian(s[0],s[1],s[2]) for s in median]
cerr = [carpos[i] - belcart[i] for i in range(len(carpos))]
merr = [(median[i][0] - belpos[i][0], 
         median[i][1] - belpos[i][1],
         median[i][2] - belpos[i][2]) for i in range(len(median))]
merr = [merr[i] if merr[i][0] < 5.0 
        else (merr[i][0] - lap_median_length, merr[i][1], merr[i][2]) 
        for i in range(len(merr))]

In [4]:
y = plt.plot(cerr)



In [5]:
y = plt.plot([me for me in merr])



In [6]:
y = plt.plot([pos[0].normalize(b) for b in bel])



In [7]:
y = plt.plot(median)



In [8]:
y = plt.plot(belpos)



In [9]:
y = plt.plot(carpos)



In [10]:
y = plt.plot(belcart)



In [ ]:
x = [c[0] for c in belcart]
y = [c[1] for c in belcart]
plt.plot(x, y)

In [5]:
med = [(s[0],s[1]) for s in bel]
xm = [c[0] for c in med]
ym = [c[1] for c in med]
plt.plot(xm, ym)


Out[5]:
[<matplotlib.lines.Line2D at 0x1a745a802e8>]

In [6]:
i = 1
y = plt.plot([p[i] for p in carpos])
y = plt.plot([p[i] for p in belpos])



In [7]:
y = plt.plot([p[:-1] for p in carpos])



In [12]:
y = plt.plot([p[:-1] for p in belpos])



In [14]:
y = plt.plot([b for b in bel])



In [44]:
y = plt.plot([o for o in obs])



In [11]:
env._reset(random_position)


Out[11]:
[array([-0.30159535,  0.        , -0.88246308,  0.        ,  2.21666667])]

In [12]:
env.render()

In [45]:
# out penalty = max discounted reward for median laps at full speed
2.5 * dt / (1 - gamma)


Out[45]:
8.333333333333325

In [13]:
c = cars[0]
isinstance(c.front_left, Part)


Out[13]:
True

In [ ]: