Baseline: Reflex Control

Steers the car using a PD controller on the estimated $y_m$ track median offset.



In [13]:

    
from DriveItGym import *
import numpy as np
from numpy import pi

num_episodes = 1  # number of episodes to run
time_limit = 10.0
throttle_limit = 0.75
gamma = 0.98
epsilon = 0.1
kp, kv = 1.0, 0.3
random_position=False

env = DriveItEnv(time_limit, throttle_limit, gamma, show_belief_state=True, \
                 noisy=True, trail_length=3.3)
observation = env._reset(random_position)
ym_ = 0.0
done = False
reward_episode = 0
reward_sum = 0.0
crashes = 0
obs = [];pos = [];err = []

for i_episode in range(1, num_episodes + 1):
    while not done:
        if num_episodes < 2 and time_limit < 60.0:
            env.render()

        xm, ym, theta, steer, v, bl = observation
        dym = ym - ym_
        ym_ = ym
        f = steer + kp * ym + kv * dym / dt
        if f > epsilon: action = 2
        elif f < -epsilon: action = 1
        else: action = 0
            
        if v < 2.5:
            action += 3
            
        s = env.state
        sn = np.array((s[5], s[6], s[2], 0, s[4]*v_max, 0.0))
        b = np.array(env.belief)
        e = b - sn #(xm-sn[0], ym-sn[1], theta-sn[2], 0, v-sn[4])
        obs.append(env.belief)
        pos.append(sn)
        err.append(e)

        observation, reward, done, info = env.step(action)
        reward_episode += reward
        #reward_sum = reward + gamma * reward_sum

    if num_episodes <= 10:
        print('Episode %d %s. reward: %f, laps: %f' % \
              (i_episode, info['done'], reward_episode, \
               reward_episode / lap_median_length))

    if info['done'] != 'complete':
        crashes += 1

    reward_sum += reward_episode
    reward_episode = 0
    observation = env._reset(random_position)
    done = False

if num_episodes > 1:
    print('Average reward: %f with %d crashes' % \
          (reward_sum / num_episodes, crashes))









    



Episode 1 complete. reward: 15.243222, laps: 3.027878



In [6]:

    
env.close()



In [14]:

    
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
style.use('ggplot')
%matplotlib inline
sns.set()
y = plt.plot([o for o in obs])
#plt.xlabel('time');plt.ylabel('error')



In [15]:

    
i = 1
y = plt.plot([e[i] if abs(e[i])<1.0 else 0 for e in err])
#plt.plot([o[i] for o in obs])



In [16]:

    
y = plt.plot([p[i] for p in pos])
y = plt.plot([o[i] for o in obs])



In [6]:

    
env._reset(random_position)









    Out[6]:





array([ 0.,  0.,  0.,  0.,  0.,  0.])



In [7]:

    
env.render()



In [22]:

    
# out penalty = max discounted reward for median laps at full speed
rmax = throttle_limit * v_max * dt / (1 - gamma)
rmax









    Out[22]:





2.0833333333333313



In [9]:

    
from filter import *
f = LowPassFilter(np.array([0.8, 0.5, 0.0, 0.3, 0.7]), np.array([1.0, 2, 3, 4, 5]))
f.filter([1,2,3,4,5])









    Out[9]:





array([ 1.,  2.,  3.,  4.,  5.])



In [26]:

    
f.filter([1,2,3,4,5])









    Out[26]:





array([ 1.        ,  4.66601562,  3.        ,  4.        ,  5.        ])



In [27]:

    
f.filter([1,10,10,4,5])









    Out[27]:





array([  1.        ,   7.33300781,  10.        ,   4.        ,   5.        ])



In [2]:

    
import numpy as np
g = 2.0
i = np.array([1.0, 2.0])
o = np.ones_like(i)
i * g









    Out[2]:





array([ 2.,  4.])



In [7]:

    
import numpy as np
gain = 0.85
initial_value = 0.0
feedback_gain = np.ones_like(initial_value) * gain
output_gain = 1.0 - feedback_gain
output = initial_value
feedback_value = initial_value / output_gain



In [1]:

    
def plop(feedback_gain, initial_value=np.array([0.0])):
    feedback_gain = np.ones_like(initial_value) * feedback_gain
    print(feedback_gain)
plop(0.85)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-36b63df3cfe5> in <module>()
----> 1 def plop(feedback_gain, initial_value=np.array([0.0])):
      2     feedback_gain = np.ones_like(initial_value) * feedback_gain
      3     print(feedback_gain)
      4 plop(0.85)

NameError: name 'np' is not defined



In [ ]: