In [1]:
using POMCP
using POMDPs
using POMDPModels # for the crying baby problem model
using POMDPToolbox # for the rollout simulator
In [2]:
# create the problem and initial belief
problem = BabyPOMDP(-5, -10)
dist = BoolDistribution(0.0)
Out[2]:
In [3]:
# create the solver using the FeedWhenCrying policy from
# POMDPModels for rollouts
#
# Since the updater argument is not set, this uses the
# default particle filter as described in the paper
#
solver = POMCPSolver(estimate_value=RolloutEstimator(FeedWhenCrying()),
eps=0.01,
c=10.0,
tree_queries=300,
rng=MersenneTwister(1))
# create the policy and the belief updater
policy = solve(solver, problem)
up = updater(policy)
Out[3]:
In [4]:
# create the simulator (note "Rollout"Simulator here has
# nothing to do with the rollouts in POMCP; this is the
# outer loop simulator)
sim = RolloutSimulator(rng=MersenneTwister(14),
initial_state=false,
eps=0.01);
In [5]:
# run the simulation
sim_reward = simulate(sim,
problem,
policy,
up,
dist)
Out[5]: