notebook.community

Edit and run



In [1]:

    
addprocs(4)









    Out[1]:





4-element Array{Int64,1}:
 2
 3
 4
 5



In [2]:

    
using OPCSPs
using MCTS
using POMDPToolbox
using POMDPs



In [3]:

    
N = 1000
naive_rewards = SharedArray(Float64, N)
feedback_rewards = SharedArray(Float64, N)
cheat_rewards = SharedArray(Float64, N)
mcts_rewards = SharedArray(Float64, N);



In [4]:

    
r = [0, 10.1, 5, 5, 5, 0]
cov = Float64[ 0 0 0 0 0 0; 0 0 0 0 0 0; 0 0 2 2 -2 0; 0 0 2 4 0 0; 0 0 -2 0 4 0; 0 0 0 0 0 0]
positions = Vector{Float64}[[0, 0], [0,-1.71], [0,1], [1,1], [-1,1], [0,0]];
rng = MersenneTwister(1);



In [5]:

    
@sync @parallel for j in 1:N
    p = OPCSP(r, positions, cov, 3.43, 1, 6)
    is = rand!(rng, create_state(p), initial_belief(p))
    cheat_rewards[j] = reward(p, is.d, cheat(p,is.d))
    naive_rewards[j] = reward(p, is.d, solve_op(GurobiExactSolver(),p))
    feedback_rewards[j] = reward(p, is.d, solve_opcsp_feedback(p, is.d))

    mdp = OPCSPBeliefMDP(p)
    solver = DPWSolver(rollout_solver=SolveMeanFeedback(mdp, HeuristicSolver()),
        exploration_constant=sum(p.r),
        n_iterations=1000,
        rng=MersenneTwister(j),
        k_action = 5.0,
        alpha_action = 1.0,
        k_state = 10.0,
        alpha_state = 1.0,
    )
    policy = MCTSAdapter(solve(solver, mdp))
    sim = HistoryRecorder(rng=MersenneTwister(1), initial_state=is)
    u = OPCSPUpdater(p)
    ib = convert_belief(u, initial_belief(p))
    simulate(sim, p, policy, u, ib)
    path = Int[s.i for s in sim.state_hist]
    mcts_rewards[j] = reward(p, is.d, path)
end









    Out[5]:





4-element Array{Any,1}:
 RemoteRef{Channel{Any}}(2,1,45)
 RemoteRef{Channel{Any}}(3,1,46)
 RemoteRef{Channel{Any}}(4,1,47)
 RemoteRef{Channel{Any}}(5,1,48)



In [6]:

    
@show mean(cheat_rewards)
@show mean(naive_rewards)
@show mean(feedback_rewards)
@show mean(mcts_rewards);









    



mean(cheat_rewards) = 11.55881782240237
mean(naive_rewards) = 10.10000000000019
mean(feedback_rewards) = 10.10000000000019
mean(mcts_rewards) = 11.098253890494304



In [ ]: