In [1]:
from GridWorld import GridWorld, RandomPolicy

The left side of Figure 4.1


In [2]:
# World configuration
world_size = (4, 4)
special_state = [([0, 0], [-1, -1], range(4)), ([3, 3], [-1, -1], range(4))]

# Initialize grid world
world = GridWorld(world_size, special_state)

# Policy configuration
policy = RandomPolicy(1)

# Iteration
iteration = 0
diffs = []
while not (world.diff < 1e-4):
    world.step(policy)
    iteration += 1
    diffs.append(world.diff)

# Show value matrix
world.show_value(3, 5)


|--------+--------+--------+--------|
|   0.000| -14.000| -20.000| -22.000|
|--------+--------+--------+--------|
| -14.000| -18.000| -20.000| -20.000|
|--------+--------+--------+--------|
| -20.000| -20.000| -18.000| -14.000|
|--------+--------+--------+--------|
| -22.000| -20.000| -14.000|   0.000|
|--------+--------+--------+--------|

Assume that the transitions from the original states are unchanged.


In [3]:
# World configuration
world_size = (5, 4)
special_state = [([0, 0], [-1, -1], range(4)), ([3, 3], [-1, -1], range(4)), ([4, 0], [-1, -1], range(4)),
                 ([4, 2], [-1, -1], range(4)), ([4, 3], [-1, -1], range(4)), ([4, 1], [3, 1], [0]),
                 ([4, 1], [4, 1], [1]), ([4, 1], [3, 0], [2]), ([4, 1], [3, 2], [3]), ([3, 1], [3, 1], [1])]

# Initialize grid world
world = GridWorld(world_size, special_state)

# Policy configuration
policy = RandomPolicy(1)

# Iteration
iteration = 0
diffs = []
while not (world.diff < 1e-4):
    world.step(policy)
    iteration += 1
    diffs.append(world.diff)

# Show value matrix
world.show_value(3, 5)


|--------+--------+--------+--------|
|   0.000| -11.507| -16.744| -18.693|
|--------+--------+--------+--------|
| -10.008| -13.777| -16.032| -16.642|
|--------+--------+--------+--------|
| -12.247| -13.563| -12.963| -11.202|
|--------+--------+--------+--------|
|  -9.170| -11.263|  -7.057|   0.000|
|--------+--------+--------+--------|
|   0.000| -10.496|   0.000|   0.000|
|--------+--------+--------+--------|

Now suppose the dynamics of state 13 are also changed, such that action down from state 13 takes the agent to the new state 15.


In [4]:
# World configuration
world_size = (5, 4)
special_state = [([0, 0], [-1, -1], range(4)), ([3, 3], [-1, -1], range(4)), ([4, 0], [-1, -1], range(4)),
                 ([4, 2], [-1, -1], range(4)), ([4, 3], [-1, -1], range(4)), ([4, 1], [3, 1], [0]),
                 ([4, 1], [4, 1], [1]), ([4, 1], [3, 0], [2]), ([4, 1], [3, 2], [3])]

# Initialize grid world
world = GridWorld(world_size, special_state)

# Policy configuration
policy = RandomPolicy(1)

# Iteration
iteration = 0
diffs = []
while not (world.diff < 1e-4):
    world.step(policy)
    iteration += 1
    diffs.append(world.diff)

# Show value matrix
world.show_value(3, 5)


|--------+--------+--------+--------|
|   0.000| -11.446| -16.665| -18.614|
|--------+--------+--------+--------|
|  -9.924| -13.672| -15.937| -16.562|
|--------+--------+--------+--------|
| -12.101| -13.380| -12.847| -11.137|
|--------+--------+--------+--------|
|  -9.000| -10.899|  -6.936|   0.000|
|--------+--------+--------+--------|
|   0.000| -10.278|   0.000|   0.000|
|--------+--------+--------+--------|


In [ ]: