In [1]:
# pulp 
# sigmaの計算にlistを使っているので非常に遅い
% matplotlib inline
import matplotlib.pyplot as plt
import pulp
import quantecon as qe
import numpy as np
import time
from gurobipy import *

class SimpleOG(object):

    def __init__(self, B=10, M=5, alpha=0.5, beta=0.9):

        self.B, self.M, self.alpha, self.beta  = B, M, alpha, beta
        self.n = B + M + 1
        self.m = M + 1

        self.R = np.empty((self.n, self.m))
        self.Q = np.zeros((self.n, self.m, self.n))

        self.populate_Q()
        self.populate_R()

    def u(self, c):
        return c**self.alpha

    def populate_R(self):

        for s in range(self.n):
            for a in range(self.m):
                self.R[s, a] = self.u(s - a) if a <= s else -np.inf

    def populate_Q(self):

        for a in range(self.m):
            self.Q[:, a, a:(a + self.B + 1)] = 1.0 / (self.B + 1)


def LP_approach(setting):
    num_state = range(1, setting.num_states + 1)
    num_action = range(1, setting.num_actions + 1)
    LP = pulp.LpProblem('LP_Approach', pulp.LpMinimize)

    value_functions = pulp.LpVariable.dicts('value', num_state, 0, 50000, 'Continuous')
    
    # set constraints
    dimention = setting.num_states * setting.num_actions
    c_s = [None] * dimention
    count = 0
    for i in num_state:
        for j in num_action:
            c = value_functions[i] - setting.beta * (pulp.lpSum(setting.Q[i-1, j-1, k-1] * value_functions[k] for k in num_state)) >= setting.R[i-1,j-1]
            LP += c
            c_s[count] = c
            count += 1
    
    # set objective
    LP += pulp.lpSum(value_functions[i] for i in num_state)
    
    # solve
    LP.solve(pulp.GUROBI(msg = False))
    
    # sigma
    # if multiple actions are gotten, choose one with larger index
    f = lambda x: x.pi
    pi_list = [f(i) for i in c_s]
    pi_statedivision = zip(* [iter(pi_list)]* setting.num_actions)
    pi_state_list = [list(i) for i in pi_statedivision]
    sigma = [None] * setting.num_states
    for i, actions in enumerate(pi_state_list):
        for j, value in enumerate(actions):
            if value > 0:
                sigma[i] = j
    
    
    # values
    v = np.empty(setting.num_states)
    for i, value in enumerate(value_functions.values()):
        v[i] = value.value()
    
    res = qe.markov.ddp.DPSolveResult(v=v, sigma = sigma,
                            method='Exact Linear Programming',
                            )
    return res
    
def elapse_LP(setting):
    start = time.time()
    LP_approach(setting)
    return time.time() - start

def elapse_value(setting):
    start = time.time()
    setting.solve(method='value_iteration')
    elapsed_time = time.time() - start
    return elapsed_time

def plot_graph(n, discount):
    a = [None] * (n-9)
    b = [None] * (n-9)
    for i in range(10, n+1):
        h = SimpleOG(M = i, beta = discount)
        d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
        a[i - 10] = elapse_LP(d)
        b[i - 10] = elapse_value(d)

    plt.plot(a)
    plt.plot(b)
    
def plot_beta(shock, state, betas):
    n = [None] * len(betas)
    for i, value in enumerate(betas):
        model = SimpleOG(B = shock, M = state, beta = value)
        ddp = qe.markov.DiscreteDP(model.R, model.Q, model.beta)
        n[i] = elapse_LP(ddp)
    for j in n:
        if j > 10:
            del j
    plt.plot(n)

In [41]:
#実際に解く
#以下の正解と合致している
h = SimpleOG()
d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
res = LP_approach(d)
res


Out[41]:
  sigma: [0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 5]
 method: 'Exact Linear Programming'
      v: array([ 19.01740222,  20.01740222,  20.43161578,  20.74945302,
        21.04078099,  21.30873018,  21.54479816,  21.76928181,
        21.98270358,  22.18824323,  22.3845048 ,  22.57807736,
        22.76109127,  22.94376708,  23.11533996,  23.27761762])

In [42]:
# 正解
d.solve()


Out[42]:
       mc: Markov chain with transition matrix 
P = 
[[ 0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.          0.
   0.          0.          0.        ]
 [ 0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.          0.
   0.          0.          0.        ]
 [ 0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.          0.
   0.          0.          0.        ]
 [ 0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.          0.
   0.          0.          0.        ]
 [ 0.          0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.          0.          0.          0.        ]
 [ 0.          0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.          0.          0.          0.        ]
 [ 0.          0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.          0.          0.          0.        ]
 [ 0.          0.          0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.          0.          0.        ]
 [ 0.          0.          0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.          0.          0.        ]
 [ 0.          0.          0.          0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.          0.        ]
 [ 0.          0.          0.          0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.          0.        ]
 [ 0.          0.          0.          0.          0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.        ]
 [ 0.          0.          0.          0.          0.          0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909]
 [ 0.          0.          0.          0.          0.          0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909]
 [ 0.          0.          0.          0.          0.          0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909]
 [ 0.          0.          0.          0.          0.          0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909  0.09090909  0.09090909
   0.09090909  0.09090909  0.09090909  0.09090909]]
        v: array([ 19.01740222,  20.01740222,  20.43161578,  20.74945302,
        21.04078099,  21.30873018,  21.54479816,  21.76928181,
        21.98270358,  22.18824323,  22.3845048 ,  22.57807736,
        22.76109127,  22.94376708,  23.11533996,  23.27761762])
 max_iter: 250
    sigma: array([0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 5])
 num_iter: 3
   method: 'policy iteration'

In [45]:
# 縦軸は秒
# 青がLPで緑がvalue
# LPめっちゃ遅い…
plot_graph(50, 0.98)



In [16]:
#制約はリストにできるのか
#できる
import pulp

problem = pulp.LpProblem('sample', pulp.LpMinimize)

a = pulp.LpVariable('a', 0, 1)
b = pulp.LpVariable('b', 0, 1)

problem += a + b

c_1 = a >= 0
c_2 = b >= 0.1
c_3 = a + b == 0.5

c_s = [c_1, c_2, c_3]

for i in c_s:
    problem += i

problem.solve()

print problem

print "Result"
print "a", a.value()
print "b", b.value()
print [lagrange.pi for lagrange in c_s]


sample:
MINIMIZE
1*a + 1*b + 0
SUBJECT TO
_C1: a >= 0

_C2: b >= 0.1

_C3: a + b = 0.5

VARIABLES
a <= 1 Continuous
b <= 1 Continuous

Result
a 0.4
b 0.1
[0.0, 0.0, 1.0]

In [17]:
c_1


Out[17]:
1*a + 0 >= 0

In [1]:
#制約はnumpy arrayにできるのか
# できなさそう
import pulp
import numpy as np

problem = pulp.LpProblem('sample', pulp.LpMinimize)

a = pulp.LpVariable('a', 0, 1)
b = pulp.LpVariable('b', 0, 1)

problem += a + b

c_1 = a >= 0
c_2 = b >= 0.1
c_3 = a + b == 0.5

c_s = np.empty(3)
c_s[0] = c_1
c_s[1] = c_2
c_s[2] = c_3

for i in c_s:
    problem += i

problem.solve()

print problem

print "Result"
print "a", a.value()
print "b", b.value()
print [lagrange.pi for lagrange in c_s]


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-1-1f0efe068d9e> in <module>()
     16 
     17 c_s = np.empty(3)
---> 18 c_s[0] = c_1
     19 c_s[1] = c_2
     20 c_s[2] = c_3

TypeError: float() argument must be a string or a number

In [2]:
# LPはbetaに影響されにくい
b = np.linspace(0.9, 0.999,100)
plot_beta(20,100,b)



In [3]:
#  pulp
# そのほか時間測る系もまとめる
# sigmaをcompute_greedyで求める

% matplotlib inline
import matplotlib.pyplot as plt
import pulp
import quantecon as qe
import numpy as np
import time
from gurobipy import *

class SimpleOG(object):

    def __init__(self, B=10, M=5, alpha=0.5, beta=0.9):

        self.B, self.M, self.alpha, self.beta  = B, M, alpha, beta
        self.n = B + M + 1
        self.m = M + 1

        self.R = np.empty((self.n, self.m))
        self.Q = np.zeros((self.n, self.m, self.n))

        self.populate_Q()
        self.populate_R()

    def u(self, c):
        return c**self.alpha

    def populate_R(self):

        for s in range(self.n):
            for a in range(self.m):
                self.R[s, a] = self.u(s - a) if a <= s else -np.inf

    def populate_Q(self):

        for a in range(self.m):
            self.Q[:, a, a:(a + self.B + 1)] = 1.0 / (self.B + 1)


# solve by linear programming
def LP_approach(setting):
    num_state = range(1, setting.num_states + 1)
    num_action = range(1, setting.num_actions + 1)
    LP = pulp.LpProblem('LP_Approach', pulp.LpMinimize)

    value_functions = pulp.LpVariable.dicts('value', num_state, 0, 50000, 'Continuous')
    
    # set constraints
    dimention = setting.num_states * setting.num_actions
    c_s = [None] * dimention
    count = 0
    for i in num_state:
        for j in num_action:
            c = value_functions[i] - \
                   setting.beta * (pulp.lpSum(setting.Q[i-1, j-1, k-1] * value_functions[k] for k in num_state))\
                   >= setting.R[i-1,j-1]
            LP += c
            c_s[count] = c
            count += 1
    
    # set objective
    LP += pulp.lpSum(value_functions[i] for i in num_state)
    
    # solve
    LP.solve(pulp.GUROBI(msg = False))
    
    # values
    v = np.empty(setting.num_states)
    for i, value in enumerate(value_functions.values()):
        v[i] = value.value()
    
    # sigma
    sigma = setting.compute_greedy(v)
    
    # result
    res = qe.markov.ddp.DPSolveResult(v=v, sigma = sigma,
                            mc=setting.controlled_mc(sigma),
                            method = 'Exact Linear Programming',
                            )
    return res
    
# linear programming time
def elapse_LP(setting):
    start = time.time()
    LP_approach(setting)
    return time.time() - start

# value function time
def elapse_value(setting):
    start = time.time()
    setting.solve(method='value_iteration')
    elapsed_time = time.time() - start
    return elapsed_time

# plot time until num_state = n
def plot_graph(n, discount):
    a = [None] * (n-9)
    b = [None] * (n-9)
    for i in range(10, n+1):
        h = SimpleOG(M = i, beta = discount)
        d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
        a[i - 10] = elapse_LP(d)
        b[i - 10] = elapse_value(d)

    plt.plot(a)
    plt.plot(b)
    
# time changing beta LP
def plot_beta_LP(shock, state, betas):
    n = [None] * len(betas)
    for i, value in enumerate(betas):
        model = SimpleOG(B = shock, M = state, beta = value)
        ddp = qe.markov.DiscreteDP(model.R, model.Q, model.beta)
        n[i] = elapse_LP(ddp)
    plt.plot(n)
    
#time changing beta value function
def plot_beta_value(shock, state, betas):
    n = [None] * len(betas)
    for i, value in enumerate(betas):
        model = SimpleOG(B = shock, M = state, beta = value)
        ddp = qe.markov.DiscreteDP(model.R, model.Q, model.beta)
        n[i] = elapse_value(ddp)
    plt.plot(n)

In [ ]:
h = SimpleOG()
d = qe.markov.DiscreteDP(h.R, h.Q, h.beta)
res = LP_approach(d)
res