In [19]:
using PyPlot
In [1]:
include("sim.jl"); #include the simulator
include("floopMap.jl") #include mapping
In [2]:
nSeg=5 #number of segments
aStep=3 #angle step
aLimit=60
res=10 #resolution of segment
l=1/res #set subsegment lenght so that segment lenght =1
Out[2]:
In [70]:
### Begin continuous state space code
# s: 5 element column vector of joint angles
# a: 5 element column vector of action on each segment -> currently a[i] ∈ {-1, 0, 1}
# w: currently 9 element column vector of weights for value approximation
BasisSize = 9
global GoalPos, GoalThresh
GoalPos = [4.5,1]
GoalThresh = 0.3
goal=Zone(GoalPos', GoalThresh) #create zone object for goal
Out[70]:
In [71]:
drawZone(goal)
In [13]:
function ContReward(s)
eDec = 2 # exponential decay factor in the goal proximity
EEpos = ccEnd(s)
goalDist = norm(GoalPos - EEpos')
(goalDist < GoalThresh) ? r = 1000 : r = 0
#(goalDist < GoalThresh) ? r = 1000 : r = exp(-eDec*goalDist) # if within threshold of goal, return full reward. Otherwise, return negative exponential of distance to goal
return r
end
Out[13]:
In [12]:
function ContTrans(s,a)
#currently deterministic, finite action space
return s + a #can make this probabilistic by sampling from distribution
end
Out[12]:
In [6]:
function ValApprox(w, s)
# features: EEpos, goalDist, s, const. (currently 10 elements) -> add ObstDist when obstacles added. And other bases?
return sum(w.*BasisFun(s))
end
Out[6]:
In [11]:
function BasisFun(s)
#EEpos = ccQuick(s, 1)[end,:]
EEpos = ccEnd(s)
goalDist = norm(GoalPos' - EEpos)
return [EEpos'; goalDist; s'; 1]
end
Out[11]:
In [45]:
m = 1000 # number of state samples
w = zeros(BasisSize) #initialize weights
aSize = 243 #number of possible actions
kMax = 1 # number of samples of probabilistic transition - currently 1 because deterministic transition
γ = 0.95 # learning rate
y = zeros(m)
action = [0 0 0 0 0]
#println(size(w))
#println(size(action))
stateMat = zeros(m,5)
for i = 1:m # set up this way so can change state initialization based on trajectory following
stateMat[i, :] = [rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90)]
end
for iters = 1:50
tic()
A = zeros(m,BasisSize)
for i = 1:m
q = zeros(aSize)
state = stateMat[i,:]
A[i,:] = BasisFun(state)
for j = 1:aSize
action[1],action[2],action[3],action[4],action[5] = ind2sub((3,3,3,3,3),j)
action -=2
for k = 1:kMax
#println(ContReward(state))
#println(ValApprox(w,ContTrans(state,action)))
#println(w)
q[j] += (ContReward(state) + γ*ValApprox(w,ContTrans(state,action)))/kMax
#println("ok")
end
end
y[i] = maximum(q)
end
wp = (pinv(A)*y)
println(norm(wp - w))
w = wp
toc()
end
In [61]:
w
Out[61]:
In [59]:
s = [rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90)]
a = zeros(1,5)
nsteps=2000
traj = zeros(nsteps,5)
for i = 1:nsteps
#println(norm(ccQuick(map(deg2rad, s), 1)[end,:] - [4.5 1]))
traj[i,:] = s
q = zeros(aSize)
for j = 1:243
action[1],action[2],action[3],action[4],action[5] = ind2sub((3,3,3,3,3),j)
action -=2
q[j] += (ContReward(s) + γ*ValApprox(w,ContTrans(s,action)))
end
#println(q)
a[1],a[2],a[3],a[4],a[5] = ind2sub((3,3,3,3,3),findmax(q)[2])
a-= 2
#println(a)
s = ContTrans(s,a)
end
In [60]:
(p,e)=ccArm2(traj[1, :])
drawArm(p, e)
(p,e)=ccArm2(traj[end, :])
drawArm(p, e)
drawZone(goal)
title("start and end")
xlabel("x")
ylabel("y")
xlim(-0, 7)
ylim(-3.5, 3.5)
Out[60]:
In [43]:
hold("on")
scale=10
for i=1:nsteps
if i%(nsteps/scale)==0
(p,e)=ccArm2(traj[i, :])
drawArm(p, e)
drawZone(goal)
title("itr: $(i)")
xlabel("x")
ylabel("y")
xlim(-0, 7)
ylim(-3.5, 3.5)
end
end
In [26]:
using PyPlot
plot(p[:,1], p[:,2], "k")
Out[26]:
In [ ]: