In [19]:
using PyPlot


Qt: Untested Windows version 10.0 detected!
INFO: Loading help data...

In [1]:
include("sim.jl"); #include the simulator 
include("floopMap.jl") #include mapping


Import successful: sim.jl
Import successful: floopMap.jl

In [2]:
nSeg=5 #number of segments
aStep=3 #angle step 
aLimit=60
res=10 #resolution of segment 
l=1/res #set subsegment lenght so that segment lenght =1


Out[2]:
0.1

In [70]:
### Begin continuous state space code
# s: 5 element column vector of joint angles
# a: 5 element column vector of action on each segment -> currently a[i] ∈ {-1, 0, 1}
# w: currently 9 element column vector of weights for value approximation

BasisSize = 9

global GoalPos, GoalThresh
GoalPos = [4.5,1]
GoalThresh = 0.3
goal=Zone(GoalPos', GoalThresh) #create zone object for goal


Out[70]:
Zone([4.5,1.0],0.3)

In [71]:
drawZone(goal)


dimensions must match
while loading In[71], in expression starting on line 1

 in promote_shape at operators.jl:191 (repeats 2 times)

In [13]:
function ContReward(s)
    
    eDec = 2 # exponential decay factor in the goal proximity
    
    EEpos = ccEnd(s)
    goalDist = norm(GoalPos - EEpos')
    
    (goalDist < GoalThresh) ? r = 1000 : r = 0
    #(goalDist < GoalThresh) ? r = 1000 : r = exp(-eDec*goalDist) # if within threshold of goal, return full reward. Otherwise, return negative exponential of distance to goal
    return r
end


Out[13]:
ContReward (generic function with 1 method)

In [12]:
function ContTrans(s,a)
    #currently deterministic, finite action space
    return s + a #can make this probabilistic by sampling from distribution
end


Out[12]:
ContTrans (generic function with 1 method)

In [6]:
function ValApprox(w, s)
    # features: EEpos, goalDist, s, const. (currently 10 elements) -> add ObstDist when obstacles added. And other bases?
    return sum(w.*BasisFun(s))
end


Out[6]:
ValApprox (generic function with 1 method)

In [11]:
function BasisFun(s)
    
    #EEpos = ccQuick(s, 1)[end,:]
    EEpos = ccEnd(s)
    goalDist = norm(GoalPos' - EEpos)
    return [EEpos'; goalDist; s'; 1]
    
end


Out[11]:
BasisFun (generic function with 1 method)

In [45]:
m = 1000 # number of state samples
w = zeros(BasisSize) #initialize weights
aSize = 243 #number of possible actions
kMax = 1 # number of samples of probabilistic transition - currently 1 because deterministic transition
γ = 0.95 # learning rate
y = zeros(m)
action = [0 0 0 0 0]
#println(size(w))
#println(size(action))

stateMat = zeros(m,5)
for i = 1:m # set up this way so can change state initialization based on trajectory following
    stateMat[i, :] = [rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90)]
end

for iters = 1:50
    tic()
    A = zeros(m,BasisSize)

    for i = 1:m
    
        q = zeros(aSize)
        state = stateMat[i,:]
        A[i,:] = BasisFun(state)

        for j = 1:aSize
            action[1],action[2],action[3],action[4],action[5] = ind2sub((3,3,3,3,3),j)
            action -=2
            for k = 1:kMax
                #println(ContReward(state))
                #println(ValApprox(w,ContTrans(state,action)))
                #println(w)
                q[j] += (ContReward(state) + γ*ValApprox(w,ContTrans(state,action)))/kMax
                #println("ok")
            end
        end

        y[i] = maximum(q)
    
    end

    wp = (pinv(A)*y)
    println(norm(wp - w))
    w = wp
    toc()
end


206.52316860016202
elapsed time: 24.655052055 seconds
198.3346356142814
elapsed time: 23.537598634 seconds
190.33240602571894
elapsed time: 24.171635954 seconds
182.51461690562095
elapsed time: 23.976995527 seconds
174.87741717258925
elapsed time: 23.641832058 seconds
167.4204338274379
elapsed time: 25.645678557 seconds
160.14162090233222
elapsed time: 25.713885637 seconds
153.0443549934971
elapsed time: 24.619241686 seconds
146.1250186583152
elapsed time: 23.316657883 seconds
139.3861128602173
elapsed time: 23.827548339 seconds
132.82833308612015
elapsed time: 24.820920045 seconds
126.44825157708414
elapsed time: 23.383172862 seconds
120.24653853631006
elapsed time: 24.203330301 seconds
114.23159898350931
elapsed time: 23.790283262 seconds
108.3995556251788
elapsed time: 25.46439869 seconds
102.74452119850514
elapsed time: 25.490237613 seconds
97.27950770368244
elapsed time: 24.237565478 seconds
92.00778690522644
elapsed time: 24.231406172 seconds
86.9250501138464
elapsed time: 23.600611156 seconds
82.01907829382384
elapsed time: 28.127692605 seconds
77.2919213586003
elapsed time: 26.87235648 seconds
72.7449031369804
elapsed time: 25.765987401 seconds
68.37113716204023
elapsed time: 28.305935879 seconds
64.16695957012139
elapsed time: 26.164162947 seconds
60.131982384772286
elapsed time: 25.076564068 seconds
56.25737617602003
elapsed time: 23.952767047 seconds
52.54502975578783
elapsed time: 25.195247745 seconds
48.991079852997835
elapsed time: 24.977660855 seconds
45.604200628433254
elapsed time: 24.520528121 seconds
42.37277189047299
elapsed time: 26.936869227 seconds
39.29265341286455
elapsed time: 26.690897676 seconds
36.36255068120864
elapsed time: 24.619562079 seconds
33.576615088334485
elapsed time: 24.252912652 seconds
30.936147544984852
elapsed time: 24.615993135 seconds
28.436571019292128
elapsed time: 28.059176733 seconds
26.069051986606585
elapsed time: 27.927907152 seconds
23.832245063786363
elapsed time: 29.014634098 seconds
21.725724050987594
elapsed time: 24.891039666 seconds
19.746896818594376
elapsed time: 25.714167209 seconds
17.889641309583556
elapsed time: 24.876616187 seconds
16.154006521633157
elapsed time: 24.521053333 seconds
14.534420103783981
elapsed time: 24.767195345 seconds
13.028917814484974
elapsed time: 24.78733413 seconds
11.634193670003304
elapsed time: 24.129289214 seconds
10.351745749184579
elapsed time: 24.701926685 seconds
9.178649514664231
elapsed time: 26.030615364 seconds
8.113989710584072
elapsed time: 26.644839199 seconds
7.158808492944351
elapsed time: 26.104633692 seconds
6.3144640684388245
elapsed time: 23.458310334 seconds
5.584922586273079
elapsed time: 23.092817533 seconds

In [61]:
w


Out[61]:
9-element Array{Float64,1}:
 -163.481   
  -15.9706  
 -587.646   
   -1.62859 
   -0.671168
   -1.36586 
    0.459319
   -2.76655 
 3678.1     

In [59]:
s = [rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90) rand(-90:90)]
a = zeros(1,5)
nsteps=2000
traj = zeros(nsteps,5)
for i = 1:nsteps
    
    #println(norm(ccQuick(map(deg2rad, s), 1)[end,:] - [4.5 1]))
    traj[i,:] = s
    
    q = zeros(aSize)
    for j = 1:243
        action[1],action[2],action[3],action[4],action[5] = ind2sub((3,3,3,3,3),j)
        action -=2
        
        q[j] += (ContReward(s) + γ*ValApprox(w,ContTrans(s,action)))
    end
    
    #println(q)
    
    a[1],a[2],a[3],a[4],a[5] = ind2sub((3,3,3,3,3),findmax(q)[2])
    a-= 2
    #println(a)
    s = ContTrans(s,a)
end

In [60]:
(p,e)=ccArm2(traj[1, :])
drawArm(p, e)

(p,e)=ccArm2(traj[end, :])
drawArm(p, e)


drawZone(goal)
title("start and end")
xlabel("x")
ylabel("y")
xlim(-0, 7)
ylim(-3.5, 3.5)


Out[60]:
(-3.5,3.5)

In [43]:
hold("on")
scale=10
for i=1:nsteps
    if i%(nsteps/scale)==0
        (p,e)=ccArm2(traj[i, :])
        drawArm(p, e)
        drawZone(goal)
        title("itr: $(i)")
        xlabel("x")
        ylabel("y")
        xlim(-0, 7)
        ylim(-3.5, 3.5)
    end
end



In [26]:
using PyPlot
plot(p[:,1], p[:,2], "k")


Out[26]:
1-element Array{Any,1}:
 PyObject <matplotlib.lines.Line2D object at 0x11bdd4150>

In [ ]: