In [1]:
addprocs(4)
Out[1]:
In [2]:
using OPCSPs
using MCTS
using POMDPToolbox
using POMDPs
In [3]:
N = 1000
naive_rewards = SharedArray(Float64, N)
feedback_rewards = SharedArray(Float64, N)
cheat_rewards = SharedArray(Float64, N)
mcts_rewards = SharedArray(Float64, N);
In [4]:
r = [0, 10.1, 5, 5, 5, 0]
cov = Float64[ 0 0 0 0 0 0; 0 0 0 0 0 0; 0 0 2 2 -2 0; 0 0 2 4 0 0; 0 0 -2 0 4 0; 0 0 0 0 0 0]
positions = Vector{Float64}[[0, 0], [0,-1.71], [0,1], [1,1], [-1,1], [0,0]];
rng = MersenneTwister(1);
In [5]:
@sync @parallel for j in 1:N
p = OPCSP(r, positions, cov, 3.43, 1, 6)
is = rand!(rng, create_state(p), initial_belief(p))
cheat_rewards[j] = reward(p, is.d, cheat(p,is.d))
naive_rewards[j] = reward(p, is.d, solve_op(GurobiExactSolver(),p))
feedback_rewards[j] = reward(p, is.d, solve_opcsp_feedback(p, is.d))
mdp = OPCSPBeliefMDP(p)
solver = DPWSolver(rollout_solver=SolveMeanFeedback(mdp, HeuristicSolver()),
exploration_constant=sum(p.r),
n_iterations=1000,
rng=MersenneTwister(j),
k_action = 5.0,
alpha_action = 1.0,
k_state = 10.0,
alpha_state = 1.0,
)
policy = MCTSAdapter(solve(solver, mdp))
sim = HistoryRecorder(rng=MersenneTwister(1), initial_state=is)
u = OPCSPUpdater(p)
ib = convert_belief(u, initial_belief(p))
simulate(sim, p, policy, u, ib)
path = Int[s.i for s in sim.state_hist]
mcts_rewards[j] = reward(p, is.d, path)
end
Out[5]:
In [6]:
@show mean(cheat_rewards)
@show mean(naive_rewards)
@show mean(feedback_rewards)
@show mean(mcts_rewards);
In [ ]: