DiscreteDP Example: Asset Replacement

Daisuke Oyama

Faculty of Economics, University of Tokyo

From Miranda and Fackler, Applied Computational Economics and Finance, 2002, Section 7.6.2


In [1]:
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from quantecon.markov import DiscreteDP

In [3]:
maxage = 5    # Maximum asset age
repcost = 75  # Replacement cost
beta = 0.9    # Discount factor
m = 2         # Number of actions; 0: keep, 1: replace

In [4]:
S = np.arange(1, maxage+1)  # State space
n = len(S)                  # Number of states

In [5]:
# Reward array
R = np.empty((n, m))
R[:, 0] = 50 - 2.5 * S - 2.5 * S**2
R[:, 1] = 50 - repcost

# Infeasible action
R[-1, 0] = -np.inf

In [6]:
R


Out[6]:
array([[ 45., -25.],
       [ 35., -25.],
       [ 20., -25.],
       [  0., -25.],
       [-inf, -25.]])

In [7]:
# (Degenerate) transition probability array
Q = np.zeros((n, m, n))
for i in range(n):
    Q[i, 0, np.minimum(i+1, n-1)] = 1
    Q[i, 1, 0] = 1

In [8]:
Q


Out[8]:
array([[[ 0.,  1.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  1.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  1.,  0.],
        [ 1.,  0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.,  1.],
        [ 1.,  0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.,  1.],
        [ 1.,  0.,  0.,  0.,  0.]]])

In [9]:
# Create a DiscreteDP
ddp = DiscreteDP(R, Q, beta)

In [10]:
# Solve the dynamic optimization problem (by policy iteration)
res = ddp.solve()

In [11]:
# Number of iterations
res.num_iter


Out[11]:
1

In [12]:
# Optimal value function
res.v


Out[12]:
array([ 216.56004653,  190.62227392,  172.91363769,  169.90404187,
        169.90404187])

In [13]:
# Optimal policy
res.sigma


Out[13]:
array([0, 0, 0, 1, 1])

In [14]:
# Transition probability matrix
res.mc.P


Out[14]:
array([[ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.]])

In [15]:
# Simulate the controlled Markov chain
res.mc.state_values = S  # Set the state values
initial_state_value = 1
nyrs = 12
spath = res.mc.simulate(nyrs+1, init=initial_state_value)

In [16]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(np.arange(n)+1, res.v)
axes[0].set_xlim(1, 5)
axes[0].set_ylim(160, 220)
axes[0].set_xticks(np.linspace(1, 5, 5, endpoint=True))
axes[0].set_xlabel('Age of Machine')
axes[0].set_ylabel('Value')
axes[0].set_title('Optimal Value Function')

axes[1].plot(spath)
axes[1].set_xlim(0, nyrs)
axes[1].set_ylim(1, 4)
axes[1].set_yticks(np.linspace(1, 4, 4, endpoint=True))
axes[1].set_xlabel('Year')
axes[1].set_ylabel('Age of Machine')
axes[1].set_title('Optimal State Path')

plt.show()



In [ ]: