notebook.community

Edit and run



In [2]:

    
# addprocs(30);
addprocs(7);



In [3]:

    
using POMDPModels
using POMCP
using POMDPs
using POMDPToolbox

@everywhere begin
    using POMDPModels
    using POMCP
    using POMDPs
    using POMDPToolbox
    import POMCP.init_V
    import POMDPs: action, updater
end



In [4]:

    
# N = 100000;
N = 100;
problem = BabyPOMDP(-5, -10);



In [5]:

    
function est_reward(problem, policy, belief, N; eps=0.01)
    sum = @parallel (+) for i in 1:N
        sim_rng = MersenneTwister(i)
        sim = POMDPToolbox.RolloutSimulator(rng=sim_rng, initial_state=false, eps=eps)
        up = updater(policy)
        POMDPs.simulate(sim, problem, policy, up, initialize_belief(up,belief))
    end
    return sum/N;
end









    Out[5]:





est_reward (generic function with 1 method)



In [6]:

    
@time est_reward(problem, FeedWhenCrying(), false, N)









    



  2.339915 seconds (518.67 k allocations: 21.867 MB, 0.30% gc time)






    Out[6]:





-15.061634270536405

This is better than in the crying babies test because epsilon is large and, more importantly, it gets a notcrying observation on the first step every time



In [7]:

    
# Random
pol_rng = MersenneTwister(7)
@time est_reward(problem, RandomPolicy(problem, rng=pol_rng), BoolDistribution(0.5), N)









    



  0.665952 seconds (142.63 k allocations: 5.912 MB, 0.98% gc time)






    Out[7]:





-31.33487686696136



In [8]:

    
# POMCP with FWC rollout policy
rng = MersenneTwister(3)

solver = POMCPSolver(estimate_value=RolloutEstimator(FeedWhenCrying()),
                    eps=0.01,
                    c=10.0,
                    tree_queries=300, 
                    rng=rng,
                    node_belief_updater=updater(problem))
                    

policy = solve(solver, problem)

@time est_reward(problem, policy, BoolDistribution(0.0), N)









    



 41.543228 seconds (88.67 k allocations: 3.776 MB)



In [9]:

    
# POMCP with Random rollout policy
rng = MersenneTwister(2)
rollout_pol_rng = MersenneTwister(2)

solver = POMCPSolver(estimate_value=RolloutEstimator(RandomPolicy(problem, rng=rollout_pol_rng)),
                     eps=0.01,
                     c=10.0,
                     tree_queries=300, 
                     rng=rng,
                     node_belief_updater=updater(problem))

policy = solve(solver, problem)

@time est_reward(problem, policy, BoolDistribution(0.0), N)









    



 45.274323 seconds (4.64 k allocations: 343.500 KB)






    Out[9]:





-15.919206742702293



In [10]:

    
# Optimal policy for these particular problem parameters:
# if the belief that the baby is hungry is over .28206, then feed (see DMU book)
@everywhere begin
    type OptBabyPolicy <: POMDPs.Policy end
    function action(p::OptBabyPolicy, b::BoolDistribution, a=false)
        a = b.p>0.28206
        return a
    end
    updater(::OptBabyPolicy) = updater(BabyPOMDP(-5,-10))
end
@time est_reward(problem, OptBabyPolicy(), BoolDistribution(0.0), N)









    



  0.318258 seconds (26.70 k allocations: 1.144 MB)






    Out[10]:





-14.686851194997828



In [11]:

    
# POMCPDPW with FWC rollout policy
rng = MersenneTwister(2)

solver = POMCPDPWSolver(estimate_value=RolloutEstimator(FeedWhenCrying()),
                    eps=0.01,
                    c=10.0,
                    tree_queries=300, 
                    rng=rng,
                    node_belief_updater=updater(problem))
                    

policy = solve(solver, problem)

@time est_reward(problem, policy, BoolDistribution(0.0), N)









    



 22.876641 seconds (53.25 k allocations: 2.363 MB)






    Out[11]:





-14.706134113641657



In [ ]: