In [2]:
# addprocs(30);
addprocs(7);
In [3]:
using POMDPModels
using POMCP
using POMDPs
using POMDPToolbox
@everywhere begin
using POMDPModels
using POMCP
using POMDPs
using POMDPToolbox
import POMCP.init_V
import POMDPs: action, updater
end
In [4]:
# N = 100000;
N = 100;
problem = BabyPOMDP(-5, -10);
In [5]:
function est_reward(problem, policy, belief, N; eps=0.01)
sum = @parallel (+) for i in 1:N
sim_rng = MersenneTwister(i)
sim = POMDPToolbox.RolloutSimulator(rng=sim_rng, initial_state=false, eps=eps)
up = updater(policy)
POMDPs.simulate(sim, problem, policy, up, initialize_belief(up,belief))
end
return sum/N;
end
Out[5]:
In [6]:
@time est_reward(problem, FeedWhenCrying(), false, N)
Out[6]:
This is better than in the crying babies test because epsilon is large and, more importantly, it gets a notcrying observation on the first step every time
In [7]:
# Random
pol_rng = MersenneTwister(7)
@time est_reward(problem, RandomPolicy(problem, rng=pol_rng), BoolDistribution(0.5), N)
Out[7]:
In [8]:
# POMCP with FWC rollout policy
rng = MersenneTwister(3)
solver = POMCPSolver(estimate_value=RolloutEstimator(FeedWhenCrying()),
eps=0.01,
c=10.0,
tree_queries=300,
rng=rng,
node_belief_updater=updater(problem))
policy = solve(solver, problem)
@time est_reward(problem, policy, BoolDistribution(0.0), N)
In [9]:
# POMCP with Random rollout policy
rng = MersenneTwister(2)
rollout_pol_rng = MersenneTwister(2)
solver = POMCPSolver(estimate_value=RolloutEstimator(RandomPolicy(problem, rng=rollout_pol_rng)),
eps=0.01,
c=10.0,
tree_queries=300,
rng=rng,
node_belief_updater=updater(problem))
policy = solve(solver, problem)
@time est_reward(problem, policy, BoolDistribution(0.0), N)
Out[9]:
In [10]:
# Optimal policy for these particular problem parameters:
# if the belief that the baby is hungry is over .28206, then feed (see DMU book)
@everywhere begin
type OptBabyPolicy <: POMDPs.Policy end
function action(p::OptBabyPolicy, b::BoolDistribution, a=false)
a = b.p>0.28206
return a
end
updater(::OptBabyPolicy) = updater(BabyPOMDP(-5,-10))
end
@time est_reward(problem, OptBabyPolicy(), BoolDistribution(0.0), N)
Out[10]:
In [11]:
# POMCPDPW with FWC rollout policy
rng = MersenneTwister(2)
solver = POMCPDPWSolver(estimate_value=RolloutEstimator(FeedWhenCrying()),
eps=0.01,
c=10.0,
tree_queries=300,
rng=rng,
node_belief_updater=updater(problem))
policy = solve(solver, problem)
@time est_reward(problem, policy, BoolDistribution(0.0), N)
Out[11]:
In [ ]: