In [1]:
from GridWorld import GridWorld, RandomPolicy
The left side of Figure 4.1
In [2]:
# World configuration
world_size = (4, 4)
special_state = [([0, 0], [-1, -1], range(4)), ([3, 3], [-1, -1], range(4))]
# Initialize grid world
world = GridWorld(world_size, special_state)
# Policy configuration
policy = RandomPolicy(1)
# Iteration
iteration = 0
diffs = []
while not (world.diff < 1e-4):
world.step(policy)
iteration += 1
diffs.append(world.diff)
# Show value matrix
world.show_value(3, 5)
Assume that the transitions from the original states are unchanged.
In [3]:
# World configuration
world_size = (5, 4)
special_state = [([0, 0], [-1, -1], range(4)), ([3, 3], [-1, -1], range(4)), ([4, 0], [-1, -1], range(4)),
([4, 2], [-1, -1], range(4)), ([4, 3], [-1, -1], range(4)), ([4, 1], [3, 1], [0]),
([4, 1], [4, 1], [1]), ([4, 1], [3, 0], [2]), ([4, 1], [3, 2], [3]), ([3, 1], [3, 1], [1])]
# Initialize grid world
world = GridWorld(world_size, special_state)
# Policy configuration
policy = RandomPolicy(1)
# Iteration
iteration = 0
diffs = []
while not (world.diff < 1e-4):
world.step(policy)
iteration += 1
diffs.append(world.diff)
# Show value matrix
world.show_value(3, 5)
Now suppose the dynamics of state 13 are also changed, such that action down from state 13 takes the agent to the new state 15.
In [4]:
# World configuration
world_size = (5, 4)
special_state = [([0, 0], [-1, -1], range(4)), ([3, 3], [-1, -1], range(4)), ([4, 0], [-1, -1], range(4)),
([4, 2], [-1, -1], range(4)), ([4, 3], [-1, -1], range(4)), ([4, 1], [3, 1], [0]),
([4, 1], [4, 1], [1]), ([4, 1], [3, 0], [2]), ([4, 1], [3, 2], [3])]
# Initialize grid world
world = GridWorld(world_size, special_state)
# Policy configuration
policy = RandomPolicy(1)
# Iteration
iteration = 0
diffs = []
while not (world.diff < 1e-4):
world.step(policy)
iteration += 1
diffs.append(world.diff)
# Show value matrix
world.show_value(3, 5)
In [ ]: