In [1]:
from DriveItMultiGym import *
from car import *
from sensors import *
from belief import *
import numpy as np
from numpy import pi
num_episodes = 1 # number of episodes to run
time_limit = 10.
gamma = 0.995
epsilon = 0.1
kp, kv = 30.0, 10.0
speed_limit = 1.5
random_position=False
cars = [Car.HighPerf(Color.green, speed_limit), \
Car.Simple(Color.orange, 1.0), \
Car.Simple(Color.purple, 1.2)]
pos = [PositionTracking(car) for car in cars]
env = DriveItEnv(cars, time_limit, gamma, noisy=True)
observations = env._reset(random_position)
beliefs = PositionTracking.reset_all(pos, observations)
ym_ = {c: 0.0 for c in cars}
done = False
reward_episode = {c: 0.0 for c in cars}
reward_sum = {c: 0.0 for c in cars}
crashes = 0
actions = {}
carpos = []; obs = []; belpos = []; bel = []; err = []
for i_episode in range(1, num_episodes + 1):
while not done:
if num_episodes < 2 and time_limit < 60.0:
env.render()
for c, b in beliefs.items():
xm, ym, thetam, v, K = b
ym_dot = (ym - ym_[c]) / dt
ym_[c] = ym
f = K + kp * ym + kv * ym_dot
if f > epsilon: action = 2
elif f < -epsilon: action = 1
else: action = 0
for j in range(min(3, len(c.dist_sensors))):
sensor = c.dist_sensors[j]
d = sensor.read(cars)
if d < 0.18 \
or j == 0 and d < 0.5 and xm > loop_median_length:
action += 6
break
if action < 6 and v < speed_limit:
action += 3
actions[c] = action
carpos.append(cars[0].position)
#obs.append(observations[cars[0]])
#belpos.append(pos[0].position)
bel.append(beliefs[cars[0]])
e = cars[0].position[:-1] - pos[0].position[:-1]
e2 = wrap(cars[0].position[2] - beliefs[cars[0]][2], -pi, pi)
err.append((e[0], e[1], e2))
observations, rewards, done, info = env.step(actions)
beliefs = PositionTracking.update_all(pos, observations, dt)
for car in cars:
reward_episode[car] += rewards[car]
if num_episodes <= 10:
print('Episode %d %s. reward: %f, laps: %f' % \
(i_episode, info['done'], reward_episode[cars[0]], \
reward_episode[cars[0]] / lap_median_length))
if info['done'] != 'complete':
crashes += 1
for car in cars:
reward_sum[car] += reward_episode[car]
reward_episode[car] = 0
observations = env._reset(random_position)
beliefs = PositionTracking.reset_all(pos, observations)
done = False
if num_episodes > 1:
print('Average reward: %f with %d crashes' % \
(reward_sum[cars[0]] / num_episodes, crashes))
In [2]:
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
style.use('ggplot')
%matplotlib inline
sns.set()
In [3]:
from DriveItCircuit import *
belpos = [b[0:3] for b in bel]
belcart = [median_to_cartesian(s[0],s[1],s[2]) for s in bel]
median = [cartesian_to_median(p[0],p[1],p[2]) for p in carpos]
#medcart = [median_to_cartesian(s[0],s[1],s[2]) for s in median]
cerr = [carpos[i] - belcart[i] for i in range(len(carpos))]
merr = [(median[i][0] - belpos[i][0],
median[i][1] - belpos[i][1],
median[i][2] - belpos[i][2]) for i in range(len(median))]
merr = [merr[i] if merr[i][0] < 5.0
else (merr[i][0] - lap_median_length, merr[i][1], merr[i][2])
for i in range(len(merr))]
In [4]:
y = plt.plot(cerr)
In [5]:
y = plt.plot([me for me in merr])
In [6]:
y = plt.plot([pos[0].normalize(b) for b in bel])
In [7]:
y = plt.plot(median)
In [8]:
y = plt.plot(belpos)
In [9]:
y = plt.plot(carpos)
In [10]:
y = plt.plot(belcart)
In [ ]:
x = [c[0] for c in belcart]
y = [c[1] for c in belcart]
plt.plot(x, y)
In [5]:
med = [(s[0],s[1]) for s in bel]
xm = [c[0] for c in med]
ym = [c[1] for c in med]
plt.plot(xm, ym)
Out[5]:
In [6]:
i = 1
y = plt.plot([p[i] for p in carpos])
y = plt.plot([p[i] for p in belpos])
In [7]:
y = plt.plot([p[:-1] for p in carpos])
In [12]:
y = plt.plot([p[:-1] for p in belpos])
In [14]:
y = plt.plot([b for b in bel])
In [44]:
y = plt.plot([o for o in obs])
In [11]:
env._reset(random_position)
Out[11]:
In [12]:
env.render()
In [45]:
# out penalty = max discounted reward for median laps at full speed
2.5 * dt / (1 - gamma)
Out[45]:
In [13]:
c = cars[0]
isinstance(c.front_left, Part)
Out[13]:
In [ ]: