In [82]:
import numpy as np
from fractions import Fraction
In [154]:
#value = [Fraction(0,1) for _ in range(101)]
#policy = [1 for _ in range(101)]
#ph = Fraction(2,5)
#value[100] = Fraction(1,1)
value = np.zeros(101)
policy = np.ones(101, dtype=int)
ph = 0.1
value[100] = 1.0
print(value, policy)
In [155]:
def update():
flag = 0
for i in range(99, 0, -1):
for j in range(1, min(i+1, 100 - i + 1)):
val = (1-ph) * value[i - j] + ph * value[i + j]
if abs(val - value[i]) < 0.0001:
if j < policy[i]:
value[i] = val
policy[i] = j
flag = 1
elif val > value[i] :
value[i] = val
policy[i] = j
flag = 1
return flag
def print_q(x):
print(value[x], policy[x])
for i in range(1, min(x,100-x)+1):
print(i, (1-ph) * value[x-i] + ph * value[x+i])
In [156]:
for i in range(100):
update()
print(value[1:], policy[1:])
In [158]:
print_q(51)
In [96]:
def update1():
global value
newval = np.copy(value)
for i in range(1,100):
for j in range(1, min(i, 100-i)+1):
val = (1-ph) * value[i - j] + ph * value[i + j]
if val > newval[i] or val == newval[i] and j > policy[i]:
newval[i] = val
policy[i] = j
value = newval
for i in range(100):
update1()
print(value, policy)
In [66]: