Baseline: Reflex Control

Steers the car using a PD controller on the estimated $y_m$ track median offset.


In [13]:
from DriveItGym import *
import numpy as np
from numpy import pi

num_episodes = 1  # number of episodes to run
time_limit = 10.0
throttle_limit = 0.75
gamma = 0.98
epsilon = 0.1
kp, kv = 1.0, 0.3
random_position=False

env = DriveItEnv(time_limit, throttle_limit, gamma, show_belief_state=True, \
                 noisy=True, trail_length=3.3)
observation = env._reset(random_position)
ym_ = 0.0
done = False
reward_episode = 0
reward_sum = 0.0
crashes = 0
obs = [];pos = [];err = []

for i_episode in range(1, num_episodes + 1):
    while not done:
        if num_episodes < 2 and time_limit < 60.0:
            env.render()

        xm, ym, theta, steer, v, bl = observation
        dym = ym - ym_
        ym_ = ym
        f = steer + kp * ym + kv * dym / dt
        if f > epsilon: action = 2
        elif f < -epsilon: action = 1
        else: action = 0
            
        if v < 2.5:
            action += 3
            
        s = env.state
        sn = np.array((s[5], s[6], s[2], 0, s[4]*v_max, 0.0))
        b = np.array(env.belief)
        e = b - sn #(xm-sn[0], ym-sn[1], theta-sn[2], 0, v-sn[4])
        obs.append(env.belief)
        pos.append(sn)
        err.append(e)

        observation, reward, done, info = env.step(action)
        reward_episode += reward
        #reward_sum = reward + gamma * reward_sum

    if num_episodes <= 10:
        print('Episode %d %s. reward: %f, laps: %f' % \
              (i_episode, info['done'], reward_episode, \
               reward_episode / lap_median_length))

    if info['done'] != 'complete':
        crashes += 1

    reward_sum += reward_episode
    reward_episode = 0
    observation = env._reset(random_position)
    done = False

if num_episodes > 1:
    print('Average reward: %f with %d crashes' % \
          (reward_sum / num_episodes, crashes))


Episode 1 complete. reward: 15.243222, laps: 3.027878

In [6]:
env.close()

In [14]:
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
style.use('ggplot')
%matplotlib inline
sns.set()
y = plt.plot([o for o in obs])
#plt.xlabel('time');plt.ylabel('error')



In [15]:
i = 1
y = plt.plot([e[i] if abs(e[i])<1.0 else 0 for e in err])
#plt.plot([o[i] for o in obs])



In [16]:
y = plt.plot([p[i] for p in pos])
y = plt.plot([o[i] for o in obs])



In [6]:
env._reset(random_position)


Out[6]:
array([ 0.,  0.,  0.,  0.,  0.,  0.])

In [7]:
env.render()

In [22]:
# out penalty = max discounted reward for median laps at full speed
rmax = throttle_limit * v_max * dt / (1 - gamma)
rmax


Out[22]:
2.0833333333333313

In [9]:
from filter import *
f = LowPassFilter(np.array([0.8, 0.5, 0.0, 0.3, 0.7]), np.array([1.0, 2, 3, 4, 5]))
f.filter([1,2,3,4,5])


Out[9]:
array([ 1.,  2.,  3.,  4.,  5.])

In [26]:
f.filter([1,2,3,4,5])


Out[26]:
array([ 1.        ,  4.66601562,  3.        ,  4.        ,  5.        ])

In [27]:
f.filter([1,10,10,4,5])


Out[27]:
array([  1.        ,   7.33300781,  10.        ,   4.        ,   5.        ])

In [2]:
import numpy as np
g = 2.0
i = np.array([1.0, 2.0])
o = np.ones_like(i)
i * g


Out[2]:
array([ 2.,  4.])

In [7]:
import numpy as np
gain = 0.85
initial_value = 0.0
feedback_gain = np.ones_like(initial_value) * gain
output_gain = 1.0 - feedback_gain
output = initial_value
feedback_value = initial_value / output_gain

In [1]:
def plop(feedback_gain, initial_value=np.array([0.0])):
    feedback_gain = np.ones_like(initial_value) * feedback_gain
    print(feedback_gain)
plop(0.85)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-36b63df3cfe5> in <module>()
----> 1 def plop(feedback_gain, initial_value=np.array([0.0])):
      2     feedback_gain = np.ones_like(initial_value) * feedback_gain
      3     print(feedback_gain)
      4 plop(0.85)

NameError: name 'np' is not defined

In [ ]: