This is designed to be a minimal example to get POMCP running. The problaem is a one dimensional light-dark problem. The goal is to be near 0. Observations are noisy measuerements of position.
-3-2-1 0 1 2 3
...| | | | | | | | ...
G S
Here G is the goal. S is the starting location
In [1]:
using POMDPs
import POMDPs: discount, isterminal, actions, initial_state_distribution, create_state, reward
import POMDPs: generate_s, generate_o
In [2]:
type LightDark1DState
status::Int64
y::Float64
LightDark1DState() = new()
LightDark1DState(x, y) = new(x, y)
end
Base.:(==)(s1::LightDark1DState, s2::LightDark1DState) = (s1.status == s2.status) && (s1.y == s2.y)
Base.hash(s::LightDark1DState, h::UInt64=zero(UInt64)) = hash(s.status, hash(s.y, h));
In [3]:
type LightDark1D <: POMDPs.POMDP{LightDark1DState,Int64,Float64}
discount_factor::Float64
correct_r::Float64
incorrect_r::Float64
step_size::Float64
movement_cost::Float64
end
LightDark1D() = LightDark1D(0.9, 10, -10, 1, 0)
discount(p::LightDark1D) = p.discount_factor
isterminal(::LightDark1D, s::LightDark1DState) = s.status < 0
create_state(p::LightDark1D) = LightDark1DState(0,0);
In [4]:
sigma(x::Float64) = abs(x - 5)/sqrt(2) + 1e-2
function generate_o(p::LightDark1D, s, a, sp::LightDark1DState, rng::AbstractRNG, o::Float64=0.0)
return sp.y + Base.randn(rng)*sigma(sp.y)
end
function generate_s(p::LightDark1D, s::LightDark1DState, a::Int64, rng::AbstractRNG,
sp::LightDark1DState=create_state(p))
if s.status < 0 # Terminal state
return copy(s)
end
if a == 0 # Enter
sprime = LightDark1DState(-1, s.y)
else
sprime = LightDark1DState(s.status, s.y+a)
end
return sprime
end
function reward(p::LightDark1D, s::LightDark1DState, a::Int, sp::LightDark1DState)
if s.status < 0
return 0.0
end
if a == 0
if abs(s.y) < 1
return p.correct_r
else
return p.incorrect_r
end
else
return 0.0
end
end;
In [5]:
type LightDark1DActionSpace
actions::Vector{Int64}
end
actions(::LightDark1D) = LightDark1DActionSpace([-1, 0, 1]) # Left Stop Right
Base.rand(rng::AbstractRNG, asp::LightDark1DActionSpace, a::Int64=1) = rand(rng, asp.actions)
type LDNormalStateDist
mean::Float64
std::Float64
end
function Base.rand(rng::AbstractRNG, d::LDNormalStateDist, sample::LightDark1DState=LightDark1DState())
return LightDark1DState(0, d.mean + randn(rng)*d.std)
end
function initial_state_distribution(pomdp::LightDark1D)
return LDNormalStateDist(2, 3)
end;
In [6]:
using POMCP
using POMDPToolbox
In [7]:
solver = POMCPDPWSolver()
pomdp = LightDark1D()
policy = solve(solver, pomdp);
In [8]:
try
simulate(RolloutSimulator(), pomdp, policy)
catch ex
print(ex.msg)
end
Since the problem is continuous, the probability of POMCP having simulated the observation that is actually observed is very small, so the default particle filter will fail. Instead, we will use the SIR particle filter from POMDPToolbox. For more information on other options for belief updates see this notebook.
In order for the particle filter to re-weight the particles, we need to define the observation distribution.
In [9]:
import POMDPs: observation, pdf
immutable NormalDist # note distributions.jl does not support random number generators
mean::Float64
std::Float64
end
pdf(d::NormalDist, x::Float64) = exp(-(x-d.mean)^2/(2.0*d.std^2))/(d.std*sqrt(2.0*pi))
function observation(p::LightDark1D, a::Int, sp::LightDark1DState)
return NormalDist(sp.y, sigma(sp.y))
end
Out[9]:
In [10]:
up = SIRParticleUpdater(pomdp, 500);
simulate(RolloutSimulator(), pomdp, policy, up)
Out[10]:
This doesn't work that well, so we would need to tune parameters.