``````

In [3]:

import numpy as np
import pprint
import sys
if "../" not in sys.path:
sys.path.append("../")
from lib.envs.gridworld import GridworldEnv

``````
``````

In [4]:

pp = pprint.PrettyPrinter(indent=2)
env = GridworldEnv()

``````
``````

In [5]:

def value_iteration(env, theta=0.0001, discount_factor=1.0):
"""
Value Iteration Algorithm.

Args:
env: OpenAI environment. env.P represents the transition probabilities of the environment.
theta: Stopping threshold. If the value of all states changes less than theta
in one iteration we are done.
discount_factor: lambda time discount factor.

Returns:
A tuple (policy, V) of the optimal policy and the optimal value function.
"""

V = np.zeros(env.nS)
policy = np.zeros([env.nS, env.nA])

# Implement!
return policy, V

``````
``````

In [6]:

policy, v = value_iteration(env)

print("Policy Probability Distribution:")
print(policy)
print("")

print("Reshaped Grid Policy (0=up, 1=right, 2=down, 3=left):")
print(np.reshape(np.argmax(policy, axis=1), env.shape))
print("")

print("Value Function:")
print(v)
print("")

print("Reshaped Grid Value Function:")
print(v.reshape(env.shape))
print("")

``````
``````

Policy Probability Distribution:
[[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]]

Reshaped Grid Policy (0=up, 1=right, 2=down, 3=left):
[[0 0 0 0]
[0 0 0 0]
[0 0 0 0]
[0 0 0 0]]

Value Function:
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]

Reshaped Grid Value Function:
[[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]
[ 0.  0.  0.  0.]]

``````
``````

In [7]:

# Test the value function
expected_v = np.array([ 0, -1, -2, -3, -1, -2, -3, -2, -2, -3, -2, -1, -3, -2, -1,  0])
np.testing.assert_array_almost_equal(v, expected_v, decimal=2)

``````
``````

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-7-55581f8eb5c9> in <module>()
1 # Test the value function
2 expected_v = np.array([ 0, -1, -2, -3, -1, -2, -3, -2, -2, -3, -2, -1, -3, -2, -1,  0])
----> 3 np.testing.assert_array_almost_equal(v, expected_v, decimal=2)

/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/numpy/testing/utils.py in assert_array_almost_equal(x, y, decimal, err_msg, verbose)
914     assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose,
915              header=('Arrays are not almost equal to %d decimals' % decimal),
--> 916              precision=decimal)
917
918

/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/numpy/testing/utils.py in assert_array_compare(comparison, x, y, err_msg, verbose, header, precision)
735                                 names=('x', 'y'), precision=precision)
736             if not cond:
--> 737                 raise AssertionError(msg)
738     except ValueError:
739         import traceback

AssertionError:
Arrays are not almost equal to 2 decimals

(mismatch 87.5%)
x: array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
0.,  0.,  0.])
y: array([ 0, -1, -2, -3, -1, -2, -3, -2, -2, -3, -2, -1, -3, -2, -1,  0])

``````