In [1]:
# Other Imports.
from simple_rl.tasks import NavigationWorldMDP
from simple_rl.agents import QLearningAgent
from simple_rl.planning import ValueIteration
from simple_rl.tasks.grid_world.GridWorldStateClass import GridWorldState
from simple_rl.tasks.navigation.NavigationStateClass import NavigationWorldState
from simple_rl.tasks.navigation.NavigationWorldMDP import *
# Python Imports.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
np.random.seed(0)
nvmdp = NavigationWorldMDP(width=30, height=30,
nav_cell_types=['lightgray', 'yellow', 'red', 'lime', 'magenta'],
nav_cell_rewards=[0, 0, -10, -10, -10],
nav_cell_p_or_locs=[0.68, 0.17, 0.05, 0.05, 0.05],
goal_cell_locs=[[(21,21)]],
goal_cell_rewards=[1.],
goal_cell_types=["blue"],
slip_prob=0.00, step_cost=0.0, gamma=.99)
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=8, horizon=100,
init_states=[NavigationWorldState(2,2)],
init_cell_types=["lightgray"], init_unique=True,
rand_init_to_match_n_traj=True)
nvmdp.visualize_grid(trajectories=traj_states_list, show_colorbar=True, show_rewards_colorbar=True, goal_marker="*c")
Out[2]:
In [3]:
print("Cells: {}\nGoals: {}".format(nvmdp.combined_cell_types, nvmdp.goal_cell_types))
In [4]:
print("Feature format: \n<one-hot:{}>, <distance: {}>, <distance:{}>\n".format(
", ".join(nvmdp.combined_cell_types),
", ".join(nvmdp.combined_cell_types),
", ".join(nvmdp.goal_cell_types)))
In [5]:
sample_states = [(1,1), (1,2), (2,1), (1,8), (21,21)]
list(map(lambda s: nvmdp.feature_at_state(NavigationWorldState(s[0],s[1]),
feature_type="indicator",
incl_cell_distances=True,
incl_goal_indicator=False,
incl_goal_distances=True,
normalize_distance=False, dtype=np.float).tolist(), sample_states))
Out[5]:
In [6]:
sample_states = [(1,1), (1,2), (2,1), (1,8), (21,21)]
list(map(lambda s: nvmdp.feature_at_state(NavigationWorldState(s[0],s[1]),
feature_type="indicator",
incl_cell_distances=False,
incl_goal_indicator=False,
incl_goal_distances=True,
normalize_distance=False, dtype=np.float).tolist(), sample_states))
Out[6]:
In [7]:
np.random.seed(0)
nvmdp = NavigationWorldMDP(width=30, height=30,
nav_cell_types=['lightgray', 'yellow', 'red', 'lime', 'magenta'],
nav_cell_rewards=[0, 0, -10, -10, -10],
nav_cell_p_or_locs=[0.68, 0.17, 0.05, 0.05, 0.05],
goal_cell_locs=[[(21,21)], [(11,11)]],
goal_cell_rewards=[1., 1.2],
goal_cell_types=["orange","blue"],
slip_prob=0.00, step_cost=0.0, gamma=.95)
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=16, horizon=100,
init_states=[NavigationWorldState(2,2)],
init_cell_types=["lightgray"], init_unique=True,
rand_init_to_match_n_traj=True)
nvmdp.visualize_grid(trajectories=traj_states_list, show_colorbar=True, show_rewards_colorbar=True, goal_marker="*c")
## Features: <Cell Type Ind, Goal Distances>
print("Sample State Trajectory")
[nvmdp.feature_at_state(s,
feature_type="indicator",
incl_cell_distances=False,
incl_goal_indicator=False,
incl_goal_distances=True,
normalize_distance=False, dtype=np.float).tolist() for s in traj_states_list[0]]
Out[7]:
In [8]:
nvmdp = NavigationWorldMDP(width=7, height=7,
nav_cell_types=['lightgray', 'yellow', 'red'],
nav_cell_rewards=[0, 0, -10],
nav_cell_p_or_locs=[0.5, 0.5, [(2,i) for i in range(1,7)]],
goal_cell_locs=[[(7,1)],[(1,1)]],
goal_cell_types=["blue", "orange"],
goal_cell_rewards=[1.,10.],
slip_prob=0.00, step_cost=0.0, gamma=.50)
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=41, horizon=100,
init_states=[NavigationWorldState(1,2)],
init_cell_types=["lightgray", "yellow"], init_unique=True,
rand_init_to_match_n_traj=True)
nvmdp.visualize_grid(trajectories=traj_states_list, show_colorbar=True, show_rewards_colorbar=True, goal_marker="*c")
Out[8]:
In [9]:
nvmdp._reset_goals([[(7,1)],[(1,1)]],[10.,1.],["blue", "orange"])
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=41, horizon=100,
init_states=None,
init_cell_types=["lightgray", "yellow"], init_unique=True,
rand_init_to_match_n_traj=False)
nvmdp.visualize_grid(trajectories=traj_states_list, show_colorbar=True, show_rewards_colorbar=True, goal_marker="*c")
Out[9]:
In [10]:
nvmdp._reset_goals([[(7,1)],[(1,1)],[(7,7)]],[10.,1.,10.],["blue", "orange","purple"])
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=41, horizon=100,
init_states=None,
init_cell_types=["lightgray", "yellow"], init_unique=True,
rand_init_to_match_n_traj=True)
nvmdp.visualize_grid(trajectories=traj_states_list, show_colorbar=True, show_rewards_colorbar=True, goal_marker="*c")
Out[10]:
In [11]:
nvmdp._reset_goals([[(7,1)],[(1,1)],[(7,7)],[(4,4)]], [10.,1.,10.,5.],list(range(4)))
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=41, horizon=100,
init_states=None,
init_cell_types=["lightgray", "yellow"], init_unique=True,
rand_init_to_match_n_traj=True)
fig = plt.figure(figsize=(14,8))
nvmdp.visualize_grid(trajectories=traj_states_list,
show_colorbar=True, show_rewards_colorbar=True,
goal_marker="*c", fig=fig, subplot_str="121")
nvmdp.visualize_grid(nvmdp.get_value_grid(), trajectories=traj_states_list,
show_colorbar=True, show_rewards_colorbar=False,
goal_marker="*c", fig=fig, subplot_str="122",
state_space_cmap=False, title="Value")
Out[11]:
In [12]:
nvmdp.nav_cell_rewards, nvmdp.goal_cell_rewards
Out[12]:
In [13]:
nvmdp._reset_rewards([0, 0, -0.01], [], [10., 1.0, 10., 5.])
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=41, horizon=100,
init_states=None,
init_cell_types=["lightgray", "yellow"], init_unique=True,
rand_init_to_match_n_traj=True)
fig = plt.figure(figsize=(14,8))
nvmdp.visualize_grid(trajectories=traj_states_list, show_colorbar=True,
show_rewards_colorbar=True, goal_marker="*c", fig=fig, subplot_str="121")
nvmdp.visualize_grid(nvmdp.get_value_grid(), trajectories=traj_states_list,
show_colorbar=True, show_rewards_colorbar=False,
goal_marker="*c", fig=fig, subplot_str="122",
state_space_cmap=False, title="Value")
Out[13]:
In [14]:
W, H = 30, 30
n_obstacles = 30
goal_tile = RectangularTile(21,21,1,1)
obstacle_cells = list(itertools.chain(*[o() for o in generate_obstacles(
W, H, n_obstacles,
obstacle_w_mu=3, obstacle_w_std=3,
obstacle_h_mu=3, obstacle_h_std=3,
buffer_w=2, buffer_h=1, max_search_tries=100, exclude_tiles=[goal_tile])]))
In [15]:
reward_living = -0.01
reward_obtacle = -1.0
nvmdp = NavigationWorldMDP(width=W, height=H,
nav_cell_types=['lightgray', 'red'],
nav_cell_rewards=[0, -10],
nav_cell_p_or_locs=[1., obstacle_cells],
goal_cell_locs=[[(goal_tile.x, goal_tile.y)]],
goal_cell_rewards=[1.],
goal_cell_types=["blue"],
slip_prob=0.00, step_cost=0.0, gamma=.99)
traj_states_list, traj_action_list = nvmdp.sample_trajectories(n_traj=8, horizon=100,
init_states=[NavigationWorldState(2,2)],
init_cell_types=["lightgray"], init_unique=True,
rand_init_to_match_n_traj=True)
nvmdp.visualize_grid(trajectories=traj_states_list, show_colorbar=False,
show_rewards_colorbar=True, goal_marker="*c")
Out[15]: