In [4]:
    
using PyPlot
addprocs(4)
@everywhere using DataFrames
#using ProgressMeter
    
    
In [5]:
    
@everywhere actions = [:north, :south, :east, :west];
@everywhere type Maze_t
    maze::Matrix{Char}
    sensor_states::Vector{ASCIIString}
    Vw::Matrix{Float64}
    Qw::Array{Float64,3}
    Qs::Matrix{Float64}
    n::Matrix{Float64}
    C::Matrix{Float64}
    numbers::Matrix{Int64}
    state_reward_function::Dict{Char, Float64}
    belief::Vector{Float64}
end
@everywhere type Result
    maze::Maze_t
    values::Vector{Float64}
end
    
In [6]:
    
@everywhere function get_action_index(symbol::Symbol)
    r = findin(actions, [symbol])
    if length(r) != 1
        return -1
        end
    return r[1]
end
@everywhere function get_sensor_state(maze::Matrix{Char}, i::Int64, j::Int64)
    foldl(string,[maze[i,j], maze[i-1,j+1], maze[i-1,j+1], maze[i, j+1], maze[i+1, j+1], maze[i+1, j], maze[i+1, j-1], maze[i,j-1], maze[i-1,j-1]])
end
@everywhere function get_unique_sensor_states(maze::Matrix{Char})
    sensor_states = []
    for i=2:size(maze)[1] - 1
        for j=2:size(maze)[2] - 1
            if maze[i,j] != '#'
                sensor_state = get_sensor_state(maze, i, j)
                if sensor_state in sensor_states == false
                    sensor_states = [sensor_states, sensor_state]
                end
            end
        end
    end
    sensor_states
end
@everywhere function get_sensor_index(maze::Matrix{Char}, i::Int64, j::Int64, sensor_states::Vector{ASCIIString})
    sensor_state = get_sensor_state(maze, i, j)
    lst = findin(sensor_states, [sensor_state])
    if length(lst) >= 1 && lst[1] > 0
        return lst[1]
    else
        return -1
    end
end
@everywhere function get_state_coordinate(maze::Matrix{Char}, char::Char)
    for i=1:size(maze)[1]
        for j = 1:size(maze)[2]
            if maze[i,j] == char
                return i,j
            end
        end
    end
    return -1, -1
end
@everywhere function convert_string_to_string_array(s::String)
    index = 1
    while s[index] == ' ' || s[index] == '\n'
        index = index + 1
    end
    maze = nothing
    row = []
    for i=index:length(s)
        if s[i] == '\n'
            if maze == nothing
                maze = row
            else
                maze = hcat(maze, row)
            end
            row = []
        else
            row = [row, s[i]]
        end
    end
    return maze'
end
@everywhere function create_experiment_setup(definition::String, state_reward_function::Dict{Char,Float64})
    maze          = convert_string_to_string_array(definition)
    sensor_states = get_unique_sensor_states(maze)
    Vw            = zeros(size(maze))
    Qw            = zeros(size(maze)[1], size(maze)[2], 4)
    Qs            = zeros(length(sensor_states),4)
    n             = zeros(size(maze))
    C             = zeros(size(maze))
    numbered_maze = int64(zeros(size(maze)))
    belief        = zeros(length(sensor_states))
    for i = 2:size(maze)[1]-1
        for j = 2:size(maze)[2]-1
            index = get_sensor_index(maze, i, j, sensor_states)
            if index > 0
                belief[index] = belief[index] + 1.0
                numbered_maze[i,j] = index
            end
        end
    end
    belief = [1.0/v for v in belief]
    return Maze_t(maze, sensor_states, Vw, Qw, Qs, n, C, numbered_maze, state_reward_function, belief)
end
    
In [53]:
    
@everywhere function plot_data(maze::Maze_t)
    s = (size(maze.maze)[2], size(maze.maze)[1] .* 0.3)
    clf()
    figure(figsize=s)
    subplot(121)
    xticks([])
    yticks([])
    colorbar(imshow(maze.Qs, interpolation="none"))
    title("Q(s,a)")
    
    subplot(122)
    colorbar(imshow(maze.Vw, interpolation="none"))
    xticks([])
    yticks([])
    clim([minimum([0, minimum(maze.Vw)]),1000]) # 1000 is the maximum = V('T')    
    title("V(w)")
end
@everywhere function write_plot_data(maze::Maze_t, filename::String)
    s = (size(maze.maze)[2], size(maze.maze)[1] .* 0.3)
    clf()
    figure(figsize=s)
    subplot(121)
    xticks([])
    yticks([])
    colorbar(imshow(maze.Qs, interpolation="none"))
    title("Q(s,a)")
    
    subplot(122)
    colorbar(imshow(maze.Vw, interpolation="none"))
    xticks([])
    yticks([])
    title("V(w)")
    clim([minimum([0, minimum(maze.Vw)]),1000]) # 1000 is the maximum = V('T') 
    savefig(filename)
end
    
In [54]:
    
@everywhere function policy_iteration!(maze::Maze_t, k::Int64)
    # Q(w,a) = \sum_{w'} p(w'|a) V(w'), where p(w'|a) is a Dirac measure
    for i = 2:size(maze.Vw)[1]-1
        for j = 2:size(maze.Vw)[2] - 1
            maze.Qw[i,j,1] = maze.Vw[i-1, j] - 1.0
            maze.Qw[i,j,2] = maze.Vw[i+1, j] - 1.0
            maze.Qw[i,j,3] = maze.Vw[i,   j+1] - 1.0
            maze.Qw[i,j,4] = maze.Vw[i,   j-1] - 1.0
        end
    end
    
    for s = 1:length(maze.sensor_states)
        for a = 1:4
            maze.Qs[s,a] = 0.0
        end
    end
    
    for i = 2:size(maze.maze)[1] - 1
        for j = 2:size(maze.maze)[2] - 1
            index = get_sensor_index(maze.maze, i, j, maze.sensor_states)
            if index > 0
                for a = 1:4
                    maze.Qs[index, a] = maze.Qs[index, a] + maze.belief[index] * maze.Qw[i,j,a]
                end
            end
        end
    end
    
    for s = 1:length(maze.sensor_states)
        values = DataFrame(VALUES=[v for v in maze.Qs[s,:]], ACTION=actions[1:4], INDICES=[1:4])
        sort!(values, cols = (:VALUES), rev=true)
        if sum(abs(values[:,1])) < 0.001 # randomise action if no values have been aquired so far
            r = randperm(4)
            values[:ACTION]=actions[r]
            values[:INDICES] = r
        end
        values = values[1:minimum([k, size(values)[1]]),:]
        v = values[:,1]
        if sum(v) < 0.1
            v = [1.0/float64(length(v)) for x=1:length(v)]
        else
            rw = sum(v)
            v  = [w / rw for w in v]
        end
        values[:,1] = v
        
        for a = 1:4
            maze.Qs[s,a] = 0.0
        end
        
        for a = 1:size(values)[1]
            maze.Qs[s,values[a,3]] = values[a,1]
        end
    end
   # plot_data(maze, "after policy iteration")
end
    
In [55]:
    
@everywhere function policy_evaluation!(maze::Maze_t, T::Int64, R::Int64)
    for i = 1:R
        # randomise start position
        x = 1 + int64(ceil(rand() * size(maze.maze)[1])) - 1
        y = 1 + int64(ceil(rand() * size(maze.maze)[2])) - 1
    
        while maze.maze[x,y] == '#'
            x = 1 + int64(ceil(rand() * size(maze.maze)[1])) - 1
            y = 1 + int64(ceil(rand() * size(maze.maze)[2])) - 1
        end
    
        init_x = x
        init_y = y
        
        if maze.maze[init_x,init_y] in keys(maze.state_reward_function)
            # we are done, if we are on a reward field
            maze.Vw[init_x,init_y] = maze.state_reward_function[maze.maze[init_x,init_y]]
            break
        end
        for i=1:T
            s = get_sensor_index(maze.maze, x, y, maze.sensor_states)
            probabilities = maze.Qs[s,:]
            p = rand()            
            action = actions[end]
            for j = 1:4
                if p < sum(probabilities[1:j])
                    action = actions[j]
                    break
                end
            end
            if action == :north
                if maze.maze[x - 1,y] == 'B'
                    x, y = get_state_coordinate(maze.maze, 'S')
                elseif maze.maze[x - 1,y] != '#'
                    x = x - 1
                end
            elseif action == :south
                if maze.maze[x + 1,y] == 'B'
                    x, y = get_state_coordinate(maze.maze, 'S')
                elseif maze.maze[x + 1,y] != '#'
                    x = x + 1
                end
            elseif action == :east
                if maze.maze[x,y + 1] == 'B'
                    x, y = get_state_coordinate(maze.maze, 'S')
                elseif maze.maze[x,y + 1] != '#'
                    y = y + 1
                end
            elseif action == :west
                if maze.maze[x,y - 1] == 'B'
                    x, y = get_state_coordinate(maze.maze, 'S')
                elseif maze.maze[x,y - 1] != '#'
                    y = y - 1
                end
            end
            
            maze.C[init_x, init_y] = maze.C[init_x, init_y] - 1.0
            if maze.maze[x,y] in keys(maze.state_reward_function)
                maze.C[init_x, init_y] = maze.C[init_x, init_y] + maze.state_reward_function[maze.maze[x,y]]
                break # done. we found a reward
            end
        end
        maze.n[init_x, init_y]  = maze.n[init_x, init_y] + 1.0
        maze.Vw[init_x, init_y] = maze.C[init_x, init_y] / maze.n[init_x, init_y]
    end
   # plot_data(maze, "after policy evaluation\n")
end
    
In [58]:
    
maze_definition = "
#########
###T   B#
##### ###
#B    ###
### #####
###S#####
#########
"
function run_experiment(k::Int64, trials::Int64, iterations::Int64, T::Int64, N::Int64, maze_string::String)
#    pm = Progress(trials * epoch_length, 1)
    results = @parallel (hcat) for i = 1:trials
#        println("Working on trial $i")
        maze = create_experiment_setup(maze_string, ['T' => 1000.0])
        v = zeros(iterations)
        x,y = get_state_coordinate(maze.maze, 'S')
       
        for j = 1:iterations
            policy_evaluation!(maze, T, N)
            policy_iteration!(maze, k)
            v[j] = maze.Vw[x,y]
#            next!(pm)
        end
#        values = values .+ v
        Result(maze, v)
    end
    mazes  = map(r->r.maze,   results)
    values = map(r->r.values, results)
    return mazes, mean(values)
end
    
    Out[58]:
In [59]:
    
trials = 100
iterations = 2000
initial_positions = 10
mc_path_length = 10
;
    
In [60]:
    
m4, v4 = run_experiment(4, trials, iterations, mc_path_length, initial_positions, maze_definition)
plot(v4)
plot_data(m4[1])
writecsv("4.csv", v4)
f = open("4.dat", "w")
serialize(f, m4)
close(f)
m4[1].Vw
    
    
    
    Out[60]:
In [61]:
    
m3, v3 = run_experiment(3, trials, iterations, mc_path_length, initial_positions, maze_definition)
plot(v3)
plot_data(m3[1])
writecsv("3.csv", v3)
f = open("3.dat", "w")
serialize(f, m3)
close(f)
m3[1].Vw
    
    
    
    Out[61]:
In [62]:
    
m2, v2 = run_experiment(2, trials, iterations, mc_path_length, initial_positions, maze_definition)
plot(v2)
plot_data(m2[1])
writecsv("2.csv", v2)
f = open("2.dat", "w")
serialize(f, m2)
close(f)
m2[1].Vw
    
    
    
    Out[62]:
In [63]:
    
m1, v1 = run_experiment(1, trials, iterations, mc_path_length, initial_positions, maze_definition)
plot(v1)
plot_data(m1[1])
writecsv("1.csv", v1)
f = open("1.dat", "w")
serialize(f, m1)
close(f)
m1[1].Vw
    
    
    
    Out[63]:
In [68]:
    
figure(figsize=(7,4))
p4=plot(v4)
p3=plot(v3)
p2=plot(v2)
p1=plot(v1)
ylim([-11, 80])
legend( [p4,p3,p2,p1], ["k=4","k=3","k=2","k=1"], loc=4)
savefig("/Users/zahedi/Dropbox/POMDPs/plots/plot.png")
    
    
In [72]:
    
writecsv("/Users/zahedi/Dropbox/POMDPs/plots/1.csv",v1)
writecsv("/Users/zahedi/Dropbox/POMDPs/plots/2.csv",v2)
writecsv("/Users/zahedi/Dropbox/POMDPs/plots/3.csv",v3)
writecsv("/Users/zahedi/Dropbox/POMDPs/plots/4.csv",v4)
    
In [70]:
    
for i = 1:length(m1)
    write_plot_data(m1[i], "/Users/zahedi/Dropbox/POMDPs/plots/V1_$i.png")
    write_plot_data(m2[i], "/Users/zahedi/Dropbox/POMDPs/plots/V2_$i.png")
    write_plot_data(m3[i], "/Users/zahedi/Dropbox/POMDPs/plots/V3_$i.png")
    write_plot_data(m4[i], "/Users/zahedi/Dropbox/POMDPs/plots/V4_$i.png")
end
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [71]:
    
plot_data(m4[1])
    
    
    
    Out[71]:
In [ ]: