Daisuke Oyama
Faculty of Economics, University of Tokyo
We use DiscreteDP
to solve the carrer-job model considered in
http://quant-econ.net/py/career.html.
In [1]:
%matplotlib inline
In [2]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D
from matplotlib import cm
import quantecon as qe
from quantecon.markov import DiscreteDP
In [3]:
# matplotlib settings
plt.rcParams['axes.xmargin'] = 0
plt.rcParams['axes.ymargin'] = 0
plt.rcParams['patch.force_edgecolor'] = True
from cycler import cycler
plt.rcParams['axes.prop_cycle'] = cycler(color='bgrcmyk')
Construct the state space:
In [4]:
# Number of possible realizations for both theta and epsilon
N = 50
# Upper bound for both theta and epsilon
B = 5
theta = np.linspace(0, B, N) # set of theta values
epsilon = np.linspace(0, B, N) # set of epsilon values
# Set of indices for theta-epsilon pairs
s_indices2d = qe.cartesian([np.arange(N), np.arange(N)])
States are ordered as follows:
In [5]:
print(s_indices2d)
In [6]:
# Number of states
n = N * N
Distrubtions of theta
and epsilon
:
In [7]:
F_a, F_b = 1, 1
F_probs = qe.distributions.BetaBinomial(N-1, F_a, F_b).pdf()
F_mean = np.sum(theta * F_probs)
G_a, G_b = 1, 1
G_probs = qe.distributions.BetaBinomial(N-1, G_a, G_b).pdf()
G_mean = np.sum(epsilon * G_probs)
Construct the reward array R
and the transition probability array Q
:
In [8]:
# Number of actions; 0: stay put, 1: new job, 2: new life
m = 3
# Reward and transition probability arrays
R = np.empty((n, m))
Q = np.zeros((n, m, n))
# Stay put
R[:, 0] = theta[s_indices2d[:, 0]] + epsilon[s_indices2d[:, 1]]
Q[np.arange(n), 0, np.arange(n)] = 1
# New job
R[:, 1] = theta[s_indices2d[:, 0]] + G_mean
for i in range(N):
Q[i*N:(i+1)*N, 1, i*N:(i+1)*N] = G_probs
# New life
R[:, 2] = F_mean + G_mean
Q[:, 2] = F_probs.reshape(N, 1).dot(G_probs.reshape(1, N)).ravel()
Discount factor:
In [9]:
beta = 0.95
Create a DiscreteDP
instance:
In [10]:
ddp = DiscreteDP(R, Q, beta)
Solve the Markov decision problem:
In [11]:
res = ddp.solve()
Number of iterations:
In [12]:
res.num_iter
Out[12]:
The returned value function res.v
and res.sigma
are 1-dimenstional arrays.
To convert them to 2-dimensional arrays:
In [13]:
v_2d = res.v.reshape(N, N)
sigma_2d = res.sigma.reshape(N, N)
Plot the optimal value function:
In [14]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
tg, eg = np.meshgrid(theta, epsilon)
ax.plot_surface(tg,
eg,
v_2d.T,
rstride=2, cstride=2,
cmap=cm.jet,
alpha=0.5,
linewidth=0.25)
ax.set_zlim(150, 200)
ax.set_xlabel('theta', fontsize=14)
ax.set_ylabel('epsilon', fontsize=14)
ax.set_zlabel('value', fontsize=14)
ax.view_init(ax.elev, 225)
ax.set_title(r'Optimal value function: $\beta = {0}$'
.format(ddp.beta), y=1.05)
plt.show()
Plot the optimal policy function:
In [15]:
fig, ax = plt.subplots(figsize=(6, 6))
tg, eg = np.meshgrid(theta, epsilon)
lvls=(-0.5, 0.5, 1.5, 2.5)
ax.contourf(tg, eg, sigma_2d.T, levels=lvls, cmap=cm.winter, alpha=0.5)
ax.contour(tg, eg, sigma_2d.T, colors='k', levels=lvls, linewidths=2)
ax.set_xlabel('theta', fontsize=14)
ax.set_ylabel('epsilon', fontsize=14)
ax.text(1.8, 2.5, 'new life', fontsize=14)
ax.text(4.5, 2.5, 'new job', fontsize=14, rotation='vertical')
ax.text(4.0, 4.5, 'stay put', fontsize=14)
ax.set_title(r'Optimal policy function: $\beta = {0}$'.format(ddp.beta))
plt.show()
Simulate the controlled Markov chain:
In [16]:
ts_length = 20
seed = 3210 # for replication
# seed = None # to randomly initialize the random number generator
X = res.mc.simulate(ts_length=ts_length, num_reps=2, random_state=seed)
fig, axes = plt.subplots(2, 1, figsize=(10, 8))
for x, ax in zip(X, axes):
theta_path, epsilon_path = theta[s_indices2d[x, 0]], epsilon[s_indices2d[x, 1]]
ax.plot(epsilon_path, label='epsilon')
ax.plot(theta_path, label='theta')
ax.legend(loc='lower right')
ax.set_ylim(0, B)
axes[0].set_title(r'Sample paths: $\beta = {0}$'.format(ddp.beta))
plt.show()
Generate sample paths and compute the first passage times for the stay-put region:
In [17]:
M = 25000 # Number of samples
ts_length = 100
seed = 42
X = res.mc.simulate(ts_length=ts_length, init=0, num_reps=M, random_state=seed)
In [18]:
T_stars = (res.sigma[X] != 0).sum(axis=1)
Check that the state enters the regions before ts_length
:
In [19]:
all(T_stars < ts_length)
Out[19]:
In [20]:
fig, ax = plt.subplots(figsize=(8, 5))
hist = np.histogram(T_stars, bins=T_stars.max(), range=(0, T_stars.max()))
ax.bar(np.arange(T_stars.max()), hist[0], align='center')
ax.set_xlim(0, ts_length)
ax.set_xlabel('Time')
ax.set_ylabel('Frequency')
ax.set_title(r'First passage time: $\beta = {0}$'.format(ddp.beta))
plt.show()
In [21]:
np.mean(T_stars)
Out[21]:
In [22]:
np.median(T_stars)
Out[22]:
Repeat the above exercises with $\beta = 0.99$.
In [23]:
ddp.beta = 0.99
In [24]:
res99 = ddp.solve()
In [25]:
res99.num_iter
Out[25]:
In [26]:
v99_2d = res99.v.reshape(N, N)
sigma99_2d = res99.sigma.reshape(N, N)
In [27]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
tg, eg = np.meshgrid(theta, epsilon)
ax.plot_surface(tg,
eg,
v99_2d.T,
rstride=2, cstride=2,
cmap=cm.jet,
alpha=0.5,
linewidth=0.25)
ax.set_xlabel('theta', fontsize=14)
ax.set_ylabel('epsilon', fontsize=14)
ax.set_zlabel('value', fontsize=14)
ax.view_init(ax.elev, 225)
ax.set_title(r'Optimal value function: $\beta = {0}$'
.format(ddp.beta), y=1.05)
plt.show()
In [28]:
fig, ax = plt.subplots(figsize=(6, 6))
tg, eg = np.meshgrid(theta, epsilon)
lvls=(-0.5, 0.5, 1.5, 2.5)
ax.contourf(tg, eg, sigma99_2d.T, levels=lvls, cmap=cm.winter, alpha=0.5)
ax.contour(tg, eg, sigma99_2d.T, colors='k', levels=lvls, linewidths=2)
ax.set_xlabel('theta', fontsize=14)
ax.set_ylabel('epsilon', fontsize=14)
ax.text(1.8, 2.5, 'new life', fontsize=14)
ax.text(4.6, 2.5, 'new job', fontsize=14, rotation='vertical')
ax.text(4.5, 4.7, 'stay put', fontsize=14)
ax.set_title(r'Optimal policy function: $\beta = {0}$'.format(ddp.beta))
plt.show()
In [29]:
ts_length = 20
seed = 3210 # for replication
# seed = None # to randomly initialize the random number generator
X = res99.mc.simulate(ts_length=ts_length, num_reps=2, random_state=seed)
fig, axes = plt.subplots(2, 1, figsize=(10, 8))
for x, ax in zip(X, axes):
theta_path, epsilon_path = theta[s_indices2d[x, 0]], epsilon[s_indices2d[x, 1]]
ax.plot(epsilon_path, label='epsilon')
ax.plot(theta_path, label='theta')
ax.legend(loc='lower right')
ax.set_ylim(0, B)
axes[0].set_title(r'Sample paths: $\beta = {0}$'.format(ddp.beta))
plt.show()
In [30]:
M = 25000 # Number of samples
ts_length = 120
seed = 42 # for replication
# seed = None # to randomly initialize the random number generator
x = res99.mc.simulate(ts_length=ts_length, init=0, num_reps=M, random_state=seed)
T_stars = (res99.sigma[x] != 0).sum(axis=1)
In [31]:
all(T_stars < ts_length)
Out[31]:
In [32]:
fig, ax = plt.subplots(figsize=(8, 5))
hist = np.histogram(T_stars, bins=T_stars.max(), range=(0, T_stars.max()))
ax.bar(np.arange(T_stars.max()), hist[0], align='center')
ax.set_xlim(0, ts_length)
ax.set_xlabel('Time')
ax.set_ylabel('Frequency')
ax.set_title(r'First passage time: $\beta = {0}$'.format(ddp.beta))
plt.show()
In [33]:
np.mean(T_stars)
Out[33]:
In [34]:
np.median(T_stars)
Out[34]:
In [35]:
class CareerWorkerProblemDiscreteDP():
"""
Class to solve the career-job choice model.
Parameters
----------
See `CareerWorkerProblem`.
"""
def __init__(self, B=5.0, beta=0.95, N=50, F_a=1, F_b=1, G_a=1, G_b=1):
self.beta, self.N, self.B = beta, N, B
self.theta = np.linspace(0, B, N) # set of theta values
self.epsilon = np.linspace(0, B, N) # set of epsilon values
self.F_probs = qe.distributions.BetaBinomial(N-1, F_a, F_b).pdf()
self.G_probs = qe.distributions.BetaBinomial(N-1, G_a, G_b).pdf()
self.F_mean = np.sum(self.theta * self.F_probs)
self.G_mean = np.sum(self.epsilon * self.G_probs)
self.s_indices2d = qe.cartesian((np.arange(N), np.arange(N)))
self.s_values2d = qe.cartesian((self.theta, self.epsilon))
n = N * N # Number of states
m = 3 # Number of actions; 0: stay put, 1: new job, 2: new life
# Reward and transition probability arrays
R = np.empty((n, m))
Q = np.zeros((n, m, n))
# Stay put
R[:, 0] = self.s_values2d.sum(axis=-1)
Q[np.arange(n), 0, np.arange(n)] = 1
# New job
R[:, 1] = self.s_values2d[:, 0] + self.G_mean
for i in range(N):
Q[i*N:(i+1)*N, 1, i*N:(i+1)*N] = self.G_probs
# New life
R[:, 2] = self.F_mean + self.G_mean
Q[:, 2] = self.F_probs.reshape(N, 1).dot(self.G_probs.reshape(1, N)).ravel()
self.ddp = DiscreteDP(R, Q, self.beta)
self._mc = None
self.num_iter = None
@property
def mc(self):
if self._mc is None:
self.solve()
return self._mc
def solve(self, *args, **kwargs):
"""
Solve the model.
"""
res = self.ddp.solve(*args, **kwargs)
v = res.v.reshape(self.N, self.N)
sigma = res.sigma.reshape(self.N, self.N)
self._mc = res.mc
self.num_iter = res.num_iter
return v, sigma
def simulate(self, ts_length, init=None, num_reps=None, random_state=None,
ret='state_value'):
"""
Simulate the controlled Markov chain.
"""
if init is not None:
init = init[0]*self.N + init[1]
X = self.mc.simulate(ts_length, init, num_reps, random_state)
if ret == 'state_index':
paths_index = self.s_indices2d[X]
return paths_index
elif ret == 'state_value':
paths_value = self.s_values2d[X]
return paths_value
else:
raise ValueError()
Let G_a = G_b = 100
:
In [36]:
G_a, G_b = 100, 100
wp = CareerWorkerProblemDiscreteDP(G_a=G_a, G_b=G_b)
In [37]:
v, sigma = wp.solve()
In [38]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
tg, eg = np.meshgrid(wp.theta, wp.epsilon)
ax.plot_surface(tg,
eg,
v.T,
rstride=2, cstride=2,
cmap=cm.jet,
alpha=0.5,
linewidth=0.25)
ax.set_xlabel('theta', fontsize=14)
ax.set_ylabel('epsilon', fontsize=14)
ax.set_zlabel('value', fontsize=14)
ax.view_init(ax.elev, 225)
ax.set_title(r'Optimal value function: '
r'$G_a = {G_a}$, $G_b = {G_b}$, $\beta = {beta}$'
.format(G_a=G_a, G_b=G_b, beta=wp.beta), y=1.05)
plt.show()
In [39]:
fig, ax = plt.subplots(figsize=(6, 6))
tg, eg = np.meshgrid(wp.theta, wp.epsilon)
lvls=(-0.5, 0.5, 1.5, 2.5)
ax.contourf(tg, eg, sigma.T, levels=lvls, cmap=cm.winter, alpha=0.5)
ax.contour(tg, eg, sigma.T, colors='k', levels=lvls, linewidths=2)
ax.set_xlabel('theta', fontsize=14)
ax.set_ylabel('epsilon', fontsize=14)
ax.text(1.8, 2.5, 'new life', fontsize=14)
ax.text(4.5, 2.5, 'new job', fontsize=14, rotation='vertical')
ax.text(4.0, 4.5, 'stay put', fontsize=14)
ax.set_title(r'Optimal policy function: '
r'$G_a = {G_a}$, $G_b = {G_b}$, $\beta = {beta}$'
.format(G_a=G_a, G_b=G_b, beta=wp.beta))
plt.show()
In [40]:
ts_length = 20
seed = 3210 # for replication
# seed = None # to randomly initialize the random number generator
paths = wp.simulate(ts_length=ts_length, num_reps=2, random_state=seed)
fig, axes = plt.subplots(2, 1, figsize=(10, 8))
for path, ax in zip(paths, axes):
theta_path, epsilon_path = path[:, 0], path[:, 1]
ax.plot(epsilon_path, label='epsilon')
ax.plot(theta_path, label='theta')
ax.legend(loc='lower right')
ax.set_ylim(0, wp.B)
axes[0].set_title(r'Sample paths: '
r'$G_a = {G_a}$, $G_b = {G_b}$, $\beta = {beta}$'
.format(G_a=G_a, G_b=G_b, beta=wp.beta))
plt.show()
In [41]:
M = 25000 # Number of samples
ts_length = 100
seed = 42 # for replication
# seed = None # to randomly initialize the random number generator
X = wp.simulate(ts_length=ts_length, init=(0, 0), num_reps=M, random_state=seed,
ret='state_index')
T_stars = (sigma[X[..., 0], X[..., 1]] != 0).sum(axis=1)
In [42]:
all(T_stars < ts_length)
Out[42]:
In [43]:
fig, ax = plt.subplots(figsize=(8, 5))
hist = np.histogram(T_stars, bins=T_stars.max(), range=(0, T_stars.max()))
ax.bar(np.arange(T_stars.max()), hist[0], align='center')
ax.set_xlim(0, ts_length)
ax.set_xlabel('Time')
ax.set_ylabel('Frequency')
ax.set_title(r'First passage time: '
r'$G_a = {G_a}$, $G_b = {G_b}$, $\beta = {beta}$'
.format(G_a=G_a, G_b=G_b, beta=wp.beta))
plt.show()
In [44]:
np.mean(T_stars)
Out[44]:
In [45]:
np.median(T_stars)
Out[45]:
Let us compare the above CareerWorkerProblemDiscreteDP
with the original CareerWorkerProblem
:
We need to download the script file career.py
from the career
directory
in the QuantEcon.lectures.code repo.
In [46]:
# Download the module file and then import it
module = "career"
folder = "career"
object_name = "CareerWorkerProblem"
file = module + ".py"
repo = "https://github.com/QuantEcon/QuantEcon.lectures.code"
qe.util.fetch_nb_dependencies(files=[file], repo=repo, folder=folder)
exec("from {0} import {1}".format(module, object_name))
We are now ready to use CareerWorkerProblem
.
In [47]:
wp_orig = CareerWorkerProblem()
wp_ddp = CareerWorkerProblemDiscreteDP()
Solve the model by value iteration with max_iter = 50
,
which is the default value in qe.compute_fixed_point
:
In [48]:
v_init = np.ones((wp_orig.N, wp_orig.N))*100
v_orig = qe.compute_fixed_point(wp_orig.bellman_operator, v_init, print_skip=25)
sigma_orig = wp_orig.get_greedy(v_orig)
In [49]:
v_init_1d = np.ones(wp_orig.N*wp_orig.N)*100
error_tol = 1e-3
epsilon = error_tol * (2*wp_orig.beta) / (1-wp_orig.beta)
v_ddp, sigma_ddp = \
wp_ddp.solve(method='vi', v_init=v_init_1d, epsilon=epsilon, max_iter=50)
In [50]:
np.abs(v_orig - v_ddp).max()
Out[50]:
In [51]:
np.array_equal(sigma_orig-1, sigma_ddp)
Out[51]:
max_iter = 50
is binding:
In [52]:
wp_ddp.num_iter
Out[52]:
Solve by policy iteration:
In [53]:
v_ddp_pi, sigma_ddp_pi = \
wp_ddp.solve(method='pi', v_init=v_init_1d)
In [54]:
np.abs(v_orig - v_ddp_pi).max()
Out[54]:
In [55]:
np.array_equal(sigma_orig-1, sigma_ddp_pi)
Out[55]:
In [56]:
(sigma_orig-1 != sigma_ddp_pi).sum()
Out[56]:
In [57]:
np.where(sigma_orig-1 != sigma_ddp_pi)
Out[57]:
Increse max_iter
:
In [58]:
v_ddp2, sigma_ddp2 = \
wp_ddp.solve(method='vi', v_init=v_init_1d, epsilon=epsilon, max_iter=200)
In [59]:
wp_ddp.num_iter
Out[59]:
In [60]:
v_init = np.ones((wp_orig.N, wp_orig.N))*100
v_orig2 = qe.compute_fixed_point(wp_orig.bellman_operator, v_init, print_skip=25,
max_iter=200)
sigma_orig2 = wp_orig.get_greedy(v_orig2)
In [61]:
np.array_equal(sigma_orig2-1, sigma_ddp_pi)
Out[61]:
Consider the case of $\beta = 0.99$.
In [62]:
beta = 0.99
wp_orig99 = CareerWorkerProblem(beta=beta)
wp_ddp99 = CareerWorkerProblemDiscreteDP(beta=beta)
In [63]:
v_init = np.ones((wp_orig99.N, wp_orig99.N))*100
v_orig99 = qe.compute_fixed_point(wp_orig99.bellman_operator, v_init, max_iter=2000, print_skip=100)
sigma_orig99 = wp_orig99.get_greedy(v_orig99)
In [64]:
v_init_1d = np.ones(wp_orig.N*wp_orig.N)*100
error_tol = 1e-3
epsilon = error_tol * (2*wp_orig.beta) / (1-wp_orig.beta)
v_ddp99, sigma_ddp99 = \
wp_ddp99.solve(method='vi', v_init=v_init_1d, epsilon=epsilon, max_iter=2000)
In [65]:
np.array_equal(sigma_orig99-1, sigma_ddp99)
Out[65]:
In [66]:
v_ddp99_pi, sigma_ddp99_pi = \
wp_ddp99.solve(method='pi', v_init=v_init_1d)
In [67]:
np.array_equal(sigma_orig99-1, sigma_ddp99_pi)
Out[67]:
In [68]:
(sigma_orig99-1 != sigma_ddp99_pi).sum()
Out[68]:
Compute the median first passage time as in the lecture:
In [69]:
F = qe.DiscreteRV(wp_orig99.F_probs)
G = qe.DiscreteRV(wp_orig99.G_probs)
def gen_first_passage_time(sigma):
t = 0
i = j = 0
while 1:
if sigma[i, j] == 1: # Stay put
return t
elif sigma[i, j] == 2: # New job
j = int(G.draw())
else: # New life
i, j = int(F.draw()), int(G.draw())
t += 1
M = 25000 # Number of samples
samples = np.empty(M)
for i in range(M):
samples[i] = gen_first_passage_time(sigma=sigma_orig99)
print(np.median(samples))
And by the simulate
method of our CareerWorkerProblemDiscreteDP
class:
In [70]:
M = 25000 # Number of samples
ts_length = 120
seed = 42 # for replication
# seed = None # to randomly initialize the random number generator
X = wp_ddp99.simulate(ts_length=ts_length, init=(0, 0), num_reps=M, random_state=seed,
ret='state_index')
T_stars = (sigma_ddp99[X[..., 0], X[..., 1]] != 0).sum(axis=1)
np.median(T_stars)
Out[70]:
In [ ]: