In [82]:
import numpy as np
from fractions import Fraction

In [154]:
#value = [Fraction(0,1) for _ in range(101)]
#policy = [1 for _ in range(101)]
#ph = Fraction(2,5)
#value[100] = Fraction(1,1)
value = np.zeros(101)
policy = np.ones(101, dtype=int)
ph = 0.1
value[100] = 1.0
print(value, policy)


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1.] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]

In [155]:
def update():
    flag = 0
    for i in range(99, 0, -1):
        for j in range(1, min(i+1, 100 - i + 1)):
            val = (1-ph) * value[i - j] + ph * value[i + j]
            if abs(val - value[i]) < 0.0001:
                if j < policy[i]:
                    value[i] = val
                    policy[i] = j
                    flag = 1
            elif val > value[i] :
                value[i] = val
                policy[i] = j
                flag = 1
    return flag

def print_q(x):
    print(value[x], policy[x])
    for i in range(1, min(x,100-x)+1):
        print(i, (1-ph) * value[x-i] + ph * value[x+i])

In [156]:
for i in range(100):
    update()
print(value[1:], policy[1:])


[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 1.00000000e-05 1.00000000e-04
 1.00000000e-04 1.00000000e-04 1.90000000e-04 3.43900000e-04
 1.00000000e-03 1.00000900e-03 1.00009000e-03 1.00090000e-03
 1.01988559e-03 1.19075590e-03 1.90000000e-03 1.90810000e-03
 1.98100000e-03 2.71000000e-03 2.78290000e-03 3.43900000e-03
 1.00000000e-02 1.00000009e-02 1.00000090e-02 1.00000900e-02
 1.00009000e-02 1.00900810e-02 1.02439000e-02 1.09000000e-02
 1.09081000e-02 1.10539000e-02 1.17100000e-02 1.24390000e-02
 1.30951000e-02 1.90000000e-02 1.90081000e-02 1.90810000e-02
 1.98100000e-02 1.99485100e-02 2.11951000e-02 2.71000000e-02
 2.71729000e-02 2.80068031e-02 3.43900000e-02 3.51152464e-02
 4.16424638e-02 1.00000000e-01 1.00000001e-01 1.00000009e-01
 1.00000090e-01 1.00000900e-01 1.00090081e-01 1.00243900e-01
 1.00900000e-01 1.00908100e-01 1.01053900e-01 1.01710000e-01
 1.02439000e-01 1.03095100e-01 1.09000000e-01 1.09008100e-01
 1.09081000e-01 1.09810000e-01 1.09948510e-01 1.11195100e-01
 1.17100000e-01 1.17172900e-01 1.18006803e-01 1.24390000e-01
 1.25115246e-01 1.31642464e-01 1.90000000e-01 1.90000810e-01
 1.90008100e-01 1.90278559e-01 1.90810000e-01 1.91539000e-01
 1.92785590e-01 1.98100000e-01 1.98829000e-01 2.00075590e-01
 2.05514659e-01 2.11951000e-01 2.18516959e-01 2.71000000e-01
 2.71250703e-01 2.72385100e-01 2.78290000e-01 2.80068031e-01
 2.90755900e-01 3.43900000e-01 3.45146590e-01 3.52061228e-01
 4.09510000e-01 4.16855105e-01 4.75169595e-01 1.00000000e+00] [ 1  1  1  1  1  1  1  1  1  1  2  1 12  1  1  1  1  1  6  1  1  3  1  1
 25  1  1  1  1  2  1  6  1  2  3 11  1 12  1  1  3  1  7  6  1  4  3  1
  1 50  1  1  1  1  2  1  6  1  2  3 11  1 12  1  1  3  1  7  6  1  4  3
  1  1 25  1  1  3  3  5  6  6  8  9 10 11 12 12 11 10  3  8  7  6  5  4
  3  2  1  1]

In [158]:
print_q(51)


0.10000000090000001 1
1 0.10000000090000001
2 0.04747822641100001
3 0.041603811741100005
4 0.0409600081
5 0.03523051279
6 0.034545610000000004
7 0.03448081000000001
8 0.029180980000000006
9 0.02812465900000001
10 0.028072900000000005
11 0.027482410000000006
12 0.028007290000000004
13 0.028000810000000008
14 0.02269369000000001
15 0.022176100000000004
16 0.021533851000000007
17 0.021068020000000007
18 0.021527290000000008
19 0.021527290000000004
20 0.021020190310000006
21 0.021520072900000006
22 0.021512334637900008
23 0.02216432737900001
24 0.028000008100000005
25 0.028000081810000008
26 0.028000810000000008
27 0.0221229559
28 0.02158561
29 0.0215929
30 0.021061459
31 0.021527290000000004
32 0.021592900000000005
33 0.021079239310000004
34 0.021469362931000005
35 0.022095910000000007
36 0.022751776945990003
37 0.0280000081
38 0.02802507031
39 0.027548020000000006
40 0.028000000000000008
41 0.0280968031
42 0.02916559000000001
43 0.034480000000000004
44 0.034523659000000005
45 0.03520612279000001
46 0.04095100000000001
47 0.041685510511
48 0.047516959459900004
49 0.1

In [96]:
def update1():
    global value
    newval = np.copy(value)
    for i in range(1,100):
        for j in range(1, min(i, 100-i)+1):
            val = (1-ph) * value[i - j] + ph * value[i + j]
            if val > newval[i] or val == newval[i] and j > policy[i]:
                newval[i] = val
                policy[i] = j
    value = newval
for i in range(100):
    update1()
print(value, policy)


[0.         0.00206562 0.00516406 0.00922547 0.01291015 0.0173854
 0.02306368 0.02781411 0.03227539 0.03768507 0.0434635  0.05035447
 0.05765919 0.06523937 0.06953528 0.07443124 0.08068847 0.08661104
 0.09421268 0.10314362 0.10865874 0.11596663 0.12588617 0.13357998
 0.14414799 0.16       0.16309844 0.16774609 0.17383821 0.17936523
 0.1860781  0.19459552 0.20172117 0.20841308 0.21652761 0.22519525
 0.2355317  0.24648879 0.25785906 0.26430292 0.27164686 0.2810327
 0.28991657 0.30131902 0.31471544 0.32298812 0.33394994 0.34882926
 0.36036996 0.37622198 0.4        0.40309844 0.40774609 0.41383821
 0.41936523 0.4260781  0.43459552 0.44172117 0.44841308 0.45652761
 0.46519525 0.4755317  0.48648879 0.49785906 0.50430292 0.51164686
 0.5210327  0.52991657 0.54131902 0.55471544 0.56298812 0.57394994
 0.58882926 0.60036996 0.61622198 0.64       0.64464766 0.65161914
 0.66075731 0.66904785 0.67911715 0.69189327 0.70258175 0.71261962
 0.72479141 0.73779287 0.75329756 0.76973319 0.78678859 0.79645439
 0.80747029 0.82154905 0.83487485 0.85197853 0.87207316 0.88448217
 0.90092491 0.92324389 0.94055495 0.96433297 1.        ] [ 1.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23.  1. 25. 26. 27. 28. 29.  5. 31. 32. 33. 16. 35.
 14. 37. 12. 39. 10. 41. 42. 43.  6.  5. 46.  3. 48. 49. 50.  1. 48. 47.
 46. 45. 44.  7. 42. 41. 40. 39. 38. 37. 14. 35. 34. 33. 32. 31. 30. 29.
 28. 27. 24. 25. 24. 23. 22. 21. 20. 19. 18. 17. 16. 15. 14. 13. 12. 11.
 10.  9.  8.  7.  6.  5.  4.  3.  2.  1.  1.]

In [66]:



1 0.010000000000000007