In [1]:
if nprocs() < 4
addprocs(4)
end
@everywhere include("../fhagents.jl")
In [2]:
@everywhere include("../fhmdp.jl")
In [3]:
using ProgressMeter
using JLD
@everywhere function sample_episode!(alg, environ)
s = restart!(environ)
t = 1
totreward = 0.
stop = false
while !stop
a = sample_action(alg, s, t)
sn, r, stop = step!(environ, a)
totreward += r
observe!(alg, s, a, t, r, sn)
s = sn
t += 1
end
totreward
end
@everywhere function evaluate_algorithm(name, method, algargs, mdp, seed, num_epis, update_delay, filename,
skipexist=false)
if skipexist && isfile(filename)
return "Result file existed"
end
S = nS(mdp)
A = nA(mdp)
H = horizon(mdp)
r = zeros(num_epis)
er = zeros(num_epis)
cer = 0.
alg = method(S, A, H, mdp.R, algargs...)
#@ProgressMeter.showprogress 20
timed = @elapsed for k=1:num_epis
cr = sample_episode!(alg, mdp)
if k % update_delay == 1
update_policy!(alg)
cer = expected_return(mdp, (s,t) -> alg.policy[s, t])
end
r[k] = cr
er[k] = cer
end
JLD.jldopen(filename, "w", compress=true) do file
write(file, "returns", r)
write(file, "optimal_return", expected_return(mdp, optimal_policy(mdp)))
write(file, "expected_returns", er)
write(file, "update_delay", update_delay)
write(file, "seed", seed)
write(file, "gitcommit_id", readstring(`git rev-parse HEAD`))
write(file, "S", S)
write(file, "A", A)
write(file, "H", H)
#write(file, "mdp", mdp)
write(file, "name", name)
write(file, "algargs", algargs)
write(file, "duration", timed)
write(file, "now", string(now()))
end
"Results written to file"
end
@everywhere function evaluate_algorithm(name, method, algargs, S, A, H, seed, num_epis, update_delay, filename,
skipexist=false)
srand(seed)
mdp = randomMDP(S, A, H)
evaluate_algorithm(name, method, algargs, mdp, seed, num_epis, update_delay, filename, skipexist=false)
end
@everywhere function evaluate_algorithm_chain(name, method, algargs, N, seed, num_epis, update_delay, filename,
skipexist=false)
srand(seed)
mdp = ChainMDP(N)
evaluate_algorithm(name, method, algargs, mdp, seed, num_epis, update_delay, filename, skipexist=false)
end
In [4]:
methods = [("UBEV", UBEV, (0.1,)),
("UBEV_C.1",UBEV, (0.1, 0.1)),
("UBEV_C.2",UBEV, (0.1, 0.5)),
("OIM", OIM, (0.1, 0.1)),
("MBIE0.1", MBIE, (0.1, 0.1)),
("PSRL", PSRL, (1.,)),
("MoRMax", MoRMax, (0.1, .1, 1e-4)),
("UCFH", UCFH, (0.1, 0.1)),
("UCFH sc.", UCFH, (0.1, 0.1, true, 1e-6)),
("DelayedQL", DelayedQL, (0.1, 0.1, 1/1000)),
("UCRL2", UCRL2, (0.1,)),
("MedianPAC", MedianPAC, (0.1, 0.1, false, false))]
N = 10_000_000
update_delay = 10
A = 3
H = 10
Out[4]:
In [5]:
names = []
meths = []
aargs = []
Ss = []
seeds = []
fns = []
i = 1
SS = [5, 50, 200] # [5, 10, 20, 30, 50, 100, 200]
seedss = [3]
for S in SS, seed in seedss
for (name, method, algargs) in methods
i += 1
push!(names, name)
push!(meths, method)
push!(aargs, algargs)
push!(Ss, S)
push!(seeds, seed)
push!(fns, "../results/randMDP_S$(S)_A$(A)_H$(H)_$(name)_$(seed).jld")
end
end
pmap(evaluate_algorithm, names, meths, aargs, Ss, repeated(A, i), repeated(H, i), seeds, repeated(N, i),
repeated(update_delay, i), fns, repeated(true, i))
Out[5]:
In [ ]: