In [1]:
if nprocs() < 4
    addprocs(4)
end
@everywhere include("../fhagents.jl")

In [2]:
@everywhere include("../fhmdp.jl")


WARNING: replacing module Distributions.
WARNING: replacing module Distributions.
WARNING: replacing module Distributions.
WARNING: replacing module Distributions.

In [3]:
using ProgressMeter
using JLD

@everywhere function sample_episode!(alg, environ)
    s = restart!(environ)
    t = 1
    totreward = 0.
    stop = false
    while !stop
        a = sample_action(alg, s, t)
        sn, r, stop = step!(environ, a)
        totreward += r
        observe!(alg, s, a, t, r, sn)
        s = sn
        t += 1
    end
    totreward
end



@everywhere function evaluate_algorithm(name, method, algargs, mdp, seed, num_epis, update_delay, filename,
                                    skipexist=false)
    
    if skipexist && isfile(filename)
        return "Result file existed"
    end
    
    S = nS(mdp)
    A = nA(mdp)
    H = horizon(mdp)
    r = zeros(num_epis)
    er = zeros(num_epis)
    cer = 0.
    
    alg = method(S, A, H, mdp.R, algargs...)
    #@ProgressMeter.showprogress 20 
    timed = @elapsed for k=1:num_epis 
        cr = sample_episode!(alg, mdp)
        if k % update_delay == 1
            update_policy!(alg)
            cer = expected_return(mdp, (s,t) -> alg.policy[s, t])
        end
        r[k] = cr
        er[k] = cer
    end    
    
    JLD.jldopen(filename, "w", compress=true) do file
        write(file, "returns", r)
        write(file, "optimal_return", expected_return(mdp, optimal_policy(mdp)))
        write(file, "expected_returns", er)
        write(file, "update_delay", update_delay)
        write(file, "seed", seed)
        write(file, "gitcommit_id", readstring(`git rev-parse HEAD`))
        write(file, "S", S)
        write(file, "A", A)
        write(file, "H", H)
        #write(file, "mdp", mdp)
        write(file, "name", name)
        write(file, "algargs", algargs)
        write(file, "duration", timed)
        write(file, "now", string(now()))
    end
    "Results written to file"
end

@everywhere function evaluate_algorithm(name, method, algargs, S, A, H, seed, num_epis, update_delay, filename,
                                    skipexist=false)
    srand(seed)
    mdp = randomMDP(S, A, H)
    evaluate_algorithm(name, method, algargs, mdp, seed, num_epis, update_delay, filename, skipexist=false)
end

@everywhere function evaluate_algorithm_chain(name, method, algargs, N, seed, num_epis, update_delay, filename,
                                    skipexist=false)
    srand(seed)
    mdp = ChainMDP(N)
    evaluate_algorithm(name, method, algargs, mdp, seed, num_epis, update_delay, filename, skipexist=false)
end

In [4]:
methods = [("UBEV", UBEV, (0.1,)),
    ("UBEV_C.1",UBEV, (0.1, 0.1)),
    ("UBEV_C.2",UBEV, (0.1, 0.5)),
    ("OIM", OIM, (0.1, 0.1)),
    ("MBIE0.1", MBIE, (0.1, 0.1)),
    ("PSRL", PSRL, (1.,)),
    ("MoRMax", MoRMax, (0.1, .1, 1e-4)),
    ("UCFH", UCFH, (0.1, 0.1)),
    ("UCFH sc.", UCFH, (0.1, 0.1, true, 1e-6)),
    ("DelayedQL", DelayedQL, (0.1, 0.1, 1/1000)),
    ("UCRL2", UCRL2, (0.1,)),
    ("MedianPAC", MedianPAC, (0.1, 0.1, false, false))]
               
N = 10_000_000
update_delay = 10
A = 3
H = 10


Out[4]:
10

In [5]:
names = []
meths = []
aargs = []
Ss    = []
seeds = []
fns   = []
i = 1
SS = [5, 50, 200] # [5, 10, 20, 30, 50, 100, 200]
seedss = [3]
for S in SS, seed in seedss
    for (name, method, algargs) in methods
        i += 1
        push!(names, name)
        push!(meths, method)
        push!(aargs, algargs)
        push!(Ss, S)
        push!(seeds, seed)
        push!(fns, "../results/randMDP_S$(S)_A$(A)_H$(H)_$(name)_$(seed).jld")
    end
end
pmap(evaluate_algorithm, names, meths, aargs, Ss, repeated(A, i), repeated(H, i), seeds, repeated(N, i),
repeated(update_delay, i), fns, repeated(true, i))


Out[5]:
36-element Array{Any,1}:
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Results written to file"
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 ⋮                        
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    
 "Results written to file"
 "Result file existed"    
 "Result file existed"    
 "Result file existed"    

In [ ]: