In [49]:
from mdptoolbox.mdp import ValueIteration
from mdptoolbox.example import forest, small
def value_iteration(example, gamma = 0.9):
# Need probability (transition) and reward matrices
probability_matrix, reward_matrix = example()
# Setup value iteration
vi = ValueIteration(probability_matrix,
reward_matrix,
gamma)
vi.run()
print('Optimal policy : {}'.format(vi.policy))
print('Expected value : {}'.format(vi.V))
In [50]:
value_iteration(forest)
In [51]:
value_iteration(small)