notebook.community

Edit and run



In [1]:

    
using POMCP
using POMDPs
using POMDPModels # for the crying baby problem model
using POMDPToolbox # for the rollout simulator



In [2]:

    
# create the problem and initial belief
problem = BabyPOMDP(-5, -10)
dist = BoolDistribution(0.0)









    Out[2]:





POMDPModels.BoolDistribution(0.0)



In [3]:

    
# create the solver using the FeedWhenCrying policy from
# POMDPModels for rollouts
#
# Since the updater argument is not set, this uses the
# default particle filter as described in the paper
#
solver = POMCPSolver(estimate_value=RolloutEstimator(FeedWhenCrying()),
                     eps=0.01,
                     c=10.0,
                     tree_queries=300, 
                     rng=MersenneTwister(1))

# create the policy and the belief updater
policy = solve(solver, problem)
up = updater(policy)









    Out[3]:





POMCP.RootUpdater{POMCP.DeadReinvigorator{Bool}}(POMCP.DeadReinvigorator{Bool}())



In [4]:

    
# create the simulator (note "Rollout"Simulator here has
# nothing to do with the rollouts in POMCP; this is the
# outer loop simulator)
sim = RolloutSimulator(rng=MersenneTwister(14),
                       initial_state=false,
                       eps=0.01);



In [5]:

    
# run the simulation
sim_reward = simulate(sim,
                  problem,
                  policy,
                  up,
                  dist)









    Out[5]:





-6.767279837353019