DiscreteDP Example: Water Management

Shosei Yu

Faculty of Economics, University of Tokyo

From Miranda and Fackler, Applied Computational Economics and Finance, 2002, Section 7.6.5

Julia translation of the Python version


In [1]:
using QuantEcon
using Plots
plotlyjs()


Plotly javascript loaded.

To load again call

init_notebook(true)

Out[1]:
Plots.PlotlyJSBackend()

In [2]:
# The maximum capacity
maxcap = 30

# The number of states
n = maxcap + 1

# The number of actions
m = n

# Constants
a1, b1 = 14, 0.8
a2, b2 = 10, 0.4

# Reward functions
function F(x::Number)
    return a1 * x^b1
end

function G(x::Number)
    return a2 * x^b2
end

# State transition probabilities
probs = [0.1, 0.2, 0.4, 0.2, 0.1]
supp_size = length(probs)

# Discount factor
delta = 0.9


Out[2]:
0.9

Product formulation


In [3]:
# Reward array

R = zeros(Float64, n, m)
for s in 1:n
    for x in 1:m
        
        # Because julian index of array begins with 1
        xv = x - 1
        sv = s - 1
        
        if x <= s
            r = F(xv) + G(sv - xv)
        else
            r = -Inf
        end
        
        R[s, x] = r
        
    end
end

In [4]:
# Transition probability array

Q = zeros(Float64, n, m, n)
for s in 1:n
    for x in 1:m
        
        # Guarding
        if x > s
            continue
        end
        
        for j in 1:supp_size
            Q[s, x, min(s - x + j, n)] += probs[j]
        end
        
    end
end

In [5]:
# Solve the dynamic optimization problem by policy iteration

res = solve(DiscreteDP(R, Q, delta), PFI)


Out[5]:
QuantEcon.DPSolveResult{QuantEcon.PFI,Float64}([338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66  …  576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], [338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66  …  576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], 4, [1, 1, 1, 2, 2, 2, 2, 2, 2, 2  …  5, 5, 5, 5, 5, 6, 6, 6, 6, 6], Discrete Markov Chain
stochastic matrix of type Array{Float64,2}:
[0.1 0.2 … 0.0 0.0; 0.0 0.1 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.1 0.0])

In [6]:
# Number of iterations

res.num_iter


Out[6]:
4

In [7]:
# Optimal policy

res.sigma'


Out[7]:
1×31 RowVector{Int64,Array{Int64,1}}:
 1  1  1  2  2  2  2  2  2  2  3  3  3  …  4  4  5  5  5  5  5  6  6  6  6  6

In [8]:
# Optimal value function

res.v'


Out[8]:
1×31 RowVector{Float64,Array{Float64,1}}:
 338.416  361.988  377.426  391.426  …  627.056  635.21  643.282  651.278

In [9]:
# Simulate the controlled Markov chain for num_rep times
# and compute the average

init = 1
nyrs = 50
ts_length = nyrs + 1
num_rep = 10^4
ave_path = zeros(ts_length)

for i in 1:num_rep
    path = simulate(res.mc, ts_length, init = init)
    ave_path = (i / (i + 1)) * ave_path + (1 / (i + 1)) * path
end

ave_path'


Out[9]:
1×51 RowVector{Float64,Array{Float64,1}}:
 0.9999  3.004  4.69893  5.89881  …  13.5211  13.513  13.5308  13.5274

In [10]:
# Stationary distribution of the Markov chain

stationary_dist = stationary_distributions(res.mc)[1]

stationary_dist'


Out[10]:
1×31 RowVector{Float64,Array{Float64,1}}:
 0.0  0.0  1.15115e-7  1.03604e-6  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0

In [11]:
# Plot sigma, v, ave_path, stationary_dist

indices = 0:maxcap
p1 = plot(
    scatter(indices, res.sigma .- 1; label="Irrigation"),
    title="Optimal Irrigation Policy",
    ylabel="Irrigation",
    xlabel="Water Level"
)
p2 = plot(
    plot(indices, res.v; label="Value"),
    title="Optimal Value Function",
    ylabel="Value",
    xlabel="Water Level"
)
p3 = plot(
    plot(ave_path .- 1; label="Water Level"),
    title="Average Optimal State Path",
    ylabel="Water Level",
    xlabel="Year"
)
p4 = plot(
    bar(indices, stationary_dist; label="Probability"),
    title="Stationary Distribution",
    ylabel="Probability",
    xlabel="Water Level"
)

plot(p1, p2, p3, p4)


Out[11]:

State-action pairs formulation


In [12]:
# Arrays of state and action indices

S = 0:n-1
X = 0:m-1

s_indices = Int64[]
a_indices = Int64[]
S_left = reshape(S, n, 1) .- reshape(S, 1, n)

for i in 1:n
    for j in 1:n
        if S_left[i, j] >= 0
            push!(s_indices, i)
            push!(a_indices, j)
        end
    end
end

In [13]:
# Reward vector

S_left_o = S_left
S_left = Array{Int64}(length(a_indices))

for i in 1:length(a_indices)
    S_left[i] = S_left_o[s_indices[i], a_indices[i]]
end

R = F.(X[a_indices]) + G.(S_left)


Out[13]:
496-element Array{Float64,1}:
   0.0   
  10.0   
  14.0   
  13.1951
  24.0   
  24.3754
  15.5185
  27.1951
  34.3754
  33.7151
  17.411 
  29.5185
  37.5705
   ⋮     
 173.71  
 178.917 
 184.003 
 188.958 
 193.772 
 198.426 
 202.893 
 207.128 
 211.051 
 214.5   
 217.036 
 212.728 

In [14]:
# Transition probability array

L = length(S_left)
Q = zeros(L, n)

for i in 1:length(S_left)
    s_left = S_left[i]
    
    for j in 1:supp_size
        Q[i, min(s_left + j, n)] += probs[j]
    end
end

In [15]:
# Solve the dynamic optimization problem by policy iteration

res = solve(DiscreteDP(R, Q, delta, s_indices, a_indices), PFI)


Out[15]:
QuantEcon.DPSolveResult{QuantEcon.PFI,Float64}([338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66  …  576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], [338.416, 361.988, 377.426, 391.426, 405.236, 417.962, 429.861, 441.171, 452.067, 462.66  …  576.471, 585.14, 593.698, 602.157, 610.511, 618.805, 627.056, 635.21, 643.282, 651.278], 4, [1, 1, 1, 2, 2, 2, 2, 2, 2, 2  …  5, 5, 5, 5, 5, 6, 6, 6, 6, 6], Discrete Markov Chain
stochastic matrix of type Array{Float64,2}:
[0.1 0.2 … 0.0 0.0; 0.0 0.1 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.1 0.0])

In [16]:
# Number of iterations

res.num_iter


Out[16]:
4

In [17]:
# Simulate the controlled Markov chain for num_rep times
# and compute the average

init = 1
nyrs = 50
ts_length = nyrs + 1
num_rep = 10 ^ 4
ave_path = zeros(ts_length)

for i in 1:num_rep
    path = simulate(res.mc, ts_length, init = init)
    ave_path = (i / (i + 1)) * ave_path + (1 / (i + 1)) * path
end

ave_path


Out[17]:
51-element Array{Float64,1}:
  0.9999 
  3.0158 
  4.68993
  5.88621
  6.93361
  7.94041
  8.83802
  9.59694
 10.2172 
 10.718  
 11.1322 
 11.467  
 11.7598 
  ⋮      
 13.4258 
 13.4576 
 13.4336 
 13.4571 
 13.454  
 13.4666 
 13.4685 
 13.4806 
 13.5008 
 13.5062 
 13.5035 
 13.5036 

In [18]:
# Stationary distribution of the Markov chain

stationary_dist = stationary_distributions(res.mc)[1]


Out[18]:
31-element Array{Float64,1}:
 0.0        
 0.0        
 1.15115e-7 
 1.03604e-6 
 8.05805e-6 
 5.98598e-5 
 0.000444344
 0.00329805 
 0.0244792  
 0.0608112  
 0.120882   
 0.139769   
 0.15025    
 ⋮          
 0.000443927
 5.70763e-5 
 6.34181e-6 
 0.0        
 0.0        
 0.0        
 0.0        
 0.0        
 0.0        
 0.0        
 0.0        
 0.0        

In [19]:
# Plot sigma, v, ave_path, stationary_dist

indices = 0:maxcap
p1 = plot(
    scatter(indices, res.sigma .- 1; label="Irrigation"),
    title="Optimal Irrigation Policy",
    ylabel="Irrigation",
    xlabel="Water Level"
)
p2 = plot(
    plot(indices, res.v; label="Value"),
    title="Optimal Value Function",
    ylabel="Value",
    xlabel="Water Level"
)
p3 = plot(
    plot(ave_path .- 1; label="Water Level"),
    title="Average Optimal State Path",
    ylabel="Water Level",
    xlabel="Year"
)
p4 = plot(
    bar(indices, stationary_dist; label="Probability"),
    title="Stationary Distribution",
    ylabel="Probability",
    xlabel="Water Level"
)

plot(p1, p2, p3, p4)


Out[19]: