Daisuke Oyama
Faculty of Economics, University of Tokyo
From Miranda and Fackler, Applied Computational Economics and Finance, 2002, Section 7.6.5
In [1]:
%matplotlib inline
In [2]:
import itertools
import numpy as np
from scipy import sparse
import matplotlib.pyplot as plt
from quantecon.markov import DiscreteDP
In [3]:
maxcap = 30
n = maxcap + 1 # Number of states
m = n # Number of actions
a1, b1 = 14, 0.8
a2, b2 = 10, 0.4
F = lambda x: a1 * x**b1 # Benefit from irrigation
U = lambda c: a2 * c**b2 # Benefit from recreational consumption c = s - x
probs = [0.1, 0.2, 0.4, 0.2, 0.1]
supp_size = len(probs)
beta = 0.9
In [4]:
# Reward array
R = np.empty((n, m))
for s, x in itertools.product(range(n), range(m)):
R[s, x] = F(x) + U(s-x) if x <= s else -np.inf
In [5]:
# Transition probability array
Q = np.zeros((n, m, n))
for s, x in itertools.product(range(n), range(m)):
if x <= s:
for j in range(supp_size):
Q[s, x, np.minimum(s-x+j, n-1)] += probs[j]
In [6]:
# Create a DiscreteDP
ddp = DiscreteDP(R, Q, beta)
In [7]:
# Solve the dynamic optimization problem (by policy iteration)
res = ddp.solve()
In [8]:
# Number of iterations
res.num_iter
Out[8]:
In [9]:
# Optimal policy
res.sigma
Out[9]:
In [10]:
# Optimal value function
res.v
Out[10]:
In [11]:
# Simulate the controlled Markov chain for num_rep times
# and compute the average
init = 0
nyrs = 50
ts_length = nyrs + 1
num_rep = 10**4
ave_path = np.zeros(ts_length)
for i in range(num_rep):
path = res.mc.simulate(ts_length, init=init)
ave_path = (i/(i+1)) * ave_path + (1/(i+1)) * path
In [12]:
ave_path
Out[12]:
In [13]:
# Stationary distribution of the Markov chain
stationary_dist = res.mc.stationary_distributions[0]
In [14]:
stationary_dist
Out[14]:
In [15]:
# Plot sigma, v, ave_path, stationary_dist
hspace = 0.3
fig, axes = plt.subplots(2, 2, figsize=(12, 8+hspace))
fig.subplots_adjust(hspace=hspace)
axes[0, 0].plot(res.sigma, '*')
axes[0, 0].set_xlim(-1, 31)
axes[0, 0].set_ylim(-0.5, 5.5)
axes[0, 0].set_xlabel('Water Level')
axes[0, 0].set_ylabel('Irrigation')
axes[0, 0].set_title('Optimal Irrigation Policy')
axes[0, 1].plot(res.v)
axes[0, 1].set_xlim(0, 30)
y_lb, y_ub = 300, 700
axes[0, 1].set_ylim(y_lb, y_ub)
axes[0, 1].set_yticks(np.linspace(y_lb, y_ub, 5, endpoint=True))
axes[0, 1].set_xlabel('Water Level')
axes[0, 1].set_ylabel('Value')
axes[0, 1].set_title('Optimal Value Function')
axes[1, 0].plot(ave_path)
axes[1, 0].set_xlim(0, nyrs)
y_lb, y_ub = 0, 15
axes[1, 0].set_ylim(y_lb, y_ub)
axes[1, 0].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True))
axes[1, 0].set_xlabel('Year')
axes[1, 0].set_ylabel('Water Level')
axes[1, 0].set_title('Average Optimal State Path')
axes[1, 1].bar(range(n), stationary_dist, align='center')
axes[1, 1].set_xlim(-1, n)
y_lb, y_ub = 0, 0.15
axes[1, 1].set_ylim(y_lb, y_ub+0.01)
axes[1, 1].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True))
axes[1, 1].set_xlabel('Water Level')
axes[1, 1].set_ylabel('Probability')
axes[1, 1].set_title('Stationary Distribution')
plt.show()
In [16]:
# Arrays of state and action indices
S = np.arange(n)
X = np.arange(m)
S_left = S.reshape(n, 1) - X.reshape(1, n)
s_indices, a_indices = np.where(S_left >= 0)
In [17]:
# Reward vector
S_left = S_left[s_indices, a_indices]
R = F(X[a_indices]) + U(S_left)
In [18]:
# Transition probability array
L = len(S_left)
Q = sparse.lil_matrix((L, n))
for i, s_left in enumerate(S_left):
for j in range(supp_size):
Q[i, np.minimum(s_left+j, n-1)] += probs[j]
In [19]:
# Create a DiscreteDP
ddp = DiscreteDP(R, Q, beta, s_indices, a_indices)
In [20]:
# Solve the dynamic optimization problem (by policy iteration)
res = ddp.solve()
In [21]:
# Number of iterations
res.num_iter
Out[21]:
In [22]:
# Simulate the controlled Markov chain for num_rep times
# and compute the average
init = 0
nyrs = 50
ts_length = nyrs + 1
num_rep = 10**4
ave_path = np.zeros(ts_length)
for i in range(num_rep):
path = res.mc.simulate(ts_length, init=init)
ave_path = (i/(i+1)) * ave_path + (1/(i+1)) * path
In [23]:
# Stationary distribution of the Markov chain
stationary_dist = res.mc.stationary_distributions[0]
In [24]:
# Plot sigma, v, ave_path, stationary_dist
hspace = 0.3
fig, axes = plt.subplots(2, 2, figsize=(12, 8+hspace))
fig.subplots_adjust(hspace=hspace)
axes[0, 0].plot(res.sigma, '*')
axes[0, 0].set_xlim(-1, 31)
axes[0, 0].set_ylim(-0.5, 5.5)
axes[0, 0].set_xlabel('Water Level')
axes[0, 0].set_ylabel('Irrigation')
axes[0, 0].set_title('Optimal Irrigation Policy')
axes[0, 1].plot(res.v)
axes[0, 1].set_xlim(0, 30)
y_lb, y_ub = 300, 700
axes[0, 1].set_ylim(y_lb, y_ub)
axes[0, 1].set_yticks(np.linspace(y_lb, y_ub, 5, endpoint=True))
axes[0, 1].set_xlabel('Water Level')
axes[0, 1].set_ylabel('Value')
axes[0, 1].set_title('Optimal Value Function')
axes[1, 0].plot(ave_path)
axes[1, 0].set_xlim(0, nyrs)
y_lb, y_ub = 0, 15
axes[1, 0].set_ylim(y_lb, y_ub)
axes[1, 0].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True))
axes[1, 0].set_xlabel('Year')
axes[1, 0].set_ylabel('Water Level')
axes[1, 0].set_title('Average Optimal State Path')
axes[1, 1].bar(range(n), stationary_dist, align='center')
axes[1, 1].set_xlim(-1, n)
y_lb, y_ub = 0, 0.15
axes[1, 1].set_ylim(y_lb, y_ub+0.01)
axes[1, 1].set_yticks(np.linspace(y_lb, y_ub, 4, endpoint=True))
axes[1, 1].set_xlabel('Water Level')
axes[1, 1].set_ylabel('Probability')
axes[1, 1].set_title('Stationary Distribution')
plt.show()
In [ ]: