In [1]:
# pulp
# sigmaの計算にlistを使っているので非常に遅い
% matplotlib inline
import matplotlib.pyplot as plt
import pulp
import quantecon as qe
import numpy as np
import time
from gurobipy import *
class SimpleOG(object):
def __init__(self, B=10, M=5, alpha=0.5, beta=0.9):
self.B, self.M, self.alpha, self.beta = B, M, alpha, beta
self.n = B + M + 1
self.m = M + 1
self.R = np.empty((self.n, self.m))
self.Q = np.zeros((self.n, self.m, self.n))
self.populate_Q()
self.populate_R()
def u(self, c):
return c**self.alpha
def populate_R(self):
for s in range(self.n):
for a in range(self.m):
self.R[s, a] = self.u(s - a) if a <= s else -np.inf
def populate_Q(self):
for a in range(self.m):
self.Q[:, a, a:(a + self.B + 1)] = 1.0 / (self.B + 1)
def LP_approach(setting):
num_state = range(1, setting.num_states + 1)
num_action = range(1, setting.num_actions + 1)
LP = pulp.LpProblem('LP_Approach', pulp.LpMinimize)
value_functions = pulp.LpVariable.dicts('value', num_state, 0, 50000, 'Continuous')
# set constraints
dimention = setting.num_states * setting.num_actions
c_s = [None] * dimention
count = 0
for i in num_state:
for j in num_action:
c = value_functions[i] - setting.beta * (pulp.lpSum(setting.Q[i-1, j-1, k-1] * value_functions[k] for k in num_state)) >= setting.R[i-1,j-1]
LP += c
c_s[count] = c
count += 1
# set objective
LP += pulp.lpSum(value_functions[i] for i in num_state)
# solve
LP.solve(pulp.GUROBI(msg = False))
# sigma
# if multiple actions are gotten, choose one with larger index
f = lambda x: x.pi
pi_list = [f(i) for i in c_s]
pi_statedivision = zip(* [iter(pi_list)]* setting.num_actions)
pi_state_list = [list(i) for i in pi_statedivision]
sigma = [None] * setting.num_states
for i, actions in enumerate(pi_state_list):
for j, value in enumerate(actions):
if value > 0:
sigma[i] = j
# values
v = np.empty(setting.num_states)
for i, value in enumerate(value_functions.values()):
v[i] = value.value()
res = qe.markov.ddp.DPSolveResult(v=v, sigma = sigma,
method='Exact Linear Programming',
)
return res
def elapse_LP(setting):
start = time.time()
LP_approach(setting)
return time.time() - start
def elapse_value(setting):
start = time.time()
setting.solve(method='value_iteration')
elapsed_time = time.time() - start
return elapsed_time
def plot_graph(n, discount):
a = [None] * (n-9)
b = [None] * (n-9)
for i in range(10, n+1):
h = SimpleOG(M = i, beta = discount)
d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
a[i - 10] = elapse_LP(d)
b[i - 10] = elapse_value(d)
plt.plot(a)
plt.plot(b)
def plot_beta(shock, state, betas):
n = [None] * len(betas)
for i, value in enumerate(betas):
model = SimpleOG(B = shock, M = state, beta = value)
ddp = qe.markov.DiscreteDP(model.R, model.Q, model.beta)
n[i] = elapse_LP(ddp)
for j in n:
if j > 10:
del j
plt.plot(n)
In [41]:
#実際に解く
#以下の正解と合致している
h = SimpleOG()
d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
res = LP_approach(d)
res
Out[41]:
In [42]:
# 正解
d.solve()
Out[42]:
In [45]:
# 縦軸は秒
# 青がLPで緑がvalue
# LPめっちゃ遅い…
plot_graph(50, 0.98)
In [16]:
#制約はリストにできるのか
#できる
import pulp
problem = pulp.LpProblem('sample', pulp.LpMinimize)
a = pulp.LpVariable('a', 0, 1)
b = pulp.LpVariable('b', 0, 1)
problem += a + b
c_1 = a >= 0
c_2 = b >= 0.1
c_3 = a + b == 0.5
c_s = [c_1, c_2, c_3]
for i in c_s:
problem += i
problem.solve()
print problem
print "Result"
print "a", a.value()
print "b", b.value()
print [lagrange.pi for lagrange in c_s]
In [17]:
c_1
Out[17]:
In [1]:
#制約はnumpy arrayにできるのか
# できなさそう
import pulp
import numpy as np
problem = pulp.LpProblem('sample', pulp.LpMinimize)
a = pulp.LpVariable('a', 0, 1)
b = pulp.LpVariable('b', 0, 1)
problem += a + b
c_1 = a >= 0
c_2 = b >= 0.1
c_3 = a + b == 0.5
c_s = np.empty(3)
c_s[0] = c_1
c_s[1] = c_2
c_s[2] = c_3
for i in c_s:
problem += i
problem.solve()
print problem
print "Result"
print "a", a.value()
print "b", b.value()
print [lagrange.pi for lagrange in c_s]
In [2]:
# LPはbetaに影響されにくい
b = np.linspace(0.9, 0.999,100)
plot_beta(20,100,b)
In [3]:
# pulp
# そのほか時間測る系もまとめる
# sigmaをcompute_greedyで求める
% matplotlib inline
import matplotlib.pyplot as plt
import pulp
import quantecon as qe
import numpy as np
import time
from gurobipy import *
class SimpleOG(object):
def __init__(self, B=10, M=5, alpha=0.5, beta=0.9):
self.B, self.M, self.alpha, self.beta = B, M, alpha, beta
self.n = B + M + 1
self.m = M + 1
self.R = np.empty((self.n, self.m))
self.Q = np.zeros((self.n, self.m, self.n))
self.populate_Q()
self.populate_R()
def u(self, c):
return c**self.alpha
def populate_R(self):
for s in range(self.n):
for a in range(self.m):
self.R[s, a] = self.u(s - a) if a <= s else -np.inf
def populate_Q(self):
for a in range(self.m):
self.Q[:, a, a:(a + self.B + 1)] = 1.0 / (self.B + 1)
# solve by linear programming
def LP_approach(setting):
num_state = range(1, setting.num_states + 1)
num_action = range(1, setting.num_actions + 1)
LP = pulp.LpProblem('LP_Approach', pulp.LpMinimize)
value_functions = pulp.LpVariable.dicts('value', num_state, 0, 50000, 'Continuous')
# set constraints
dimention = setting.num_states * setting.num_actions
c_s = [None] * dimention
count = 0
for i in num_state:
for j in num_action:
c = value_functions[i] - \
setting.beta * (pulp.lpSum(setting.Q[i-1, j-1, k-1] * value_functions[k] for k in num_state))\
>= setting.R[i-1,j-1]
LP += c
c_s[count] = c
count += 1
# set objective
LP += pulp.lpSum(value_functions[i] for i in num_state)
# solve
LP.solve(pulp.GUROBI(msg = False))
# values
v = np.empty(setting.num_states)
for i, value in enumerate(value_functions.values()):
v[i] = value.value()
# sigma
sigma = setting.compute_greedy(v)
# result
res = qe.markov.ddp.DPSolveResult(v=v, sigma = sigma,
mc=setting.controlled_mc(sigma),
method = 'Exact Linear Programming',
)
return res
# linear programming time
def elapse_LP(setting):
start = time.time()
LP_approach(setting)
return time.time() - start
# value function time
def elapse_value(setting):
start = time.time()
setting.solve(method='value_iteration')
elapsed_time = time.time() - start
return elapsed_time
# plot time until num_state = n
def plot_graph(n, discount):
a = [None] * (n-9)
b = [None] * (n-9)
for i in range(10, n+1):
h = SimpleOG(M = i, beta = discount)
d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
a[i - 10] = elapse_LP(d)
b[i - 10] = elapse_value(d)
plt.plot(a)
plt.plot(b)
# time changing beta LP
def plot_beta_LP(shock, state, betas):
n = [None] * len(betas)
for i, value in enumerate(betas):
model = SimpleOG(B = shock, M = state, beta = value)
ddp = qe.markov.DiscreteDP(model.R, model.Q, model.beta)
n[i] = elapse_LP(ddp)
plt.plot(n)
#time changing beta value function
def plot_beta_value(shock, state, betas):
n = [None] * len(betas)
for i, value in enumerate(betas):
model = SimpleOG(B = shock, M = state, beta = value)
ddp = qe.markov.DiscreteDP(model.R, model.Q, model.beta)
n[i] = elapse_value(ddp)
plt.plot(n)
In [ ]:
h = SimpleOG()
d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
res = LP_approach(d)
res