In [0]:
static_map = {
'S': [('A', 300), ('B', 100), ('C', 200 )],
'A': [('S', 300), ('B', 100), ('E', 100 ), ('D', 100 )],
'B': [('S', 100), ('A', 100), ('C', 50 ), ('K', 200 )],
'C': [('S', 200), ('B', 50), ('M', 100 ), ('L', 200 )],
'D': [('A', 100), ('F', 50)],
'E': [('A', 100), ('F', 100), ('H', 100)],
'F': [('D', 50), ('E', 100), ('G', 200)],
'G': [('F', 200), ('O', 300)],
'H': [('E', 100), ('K', 300)],
'K': [('B', 200), ('H', 300)],
'L': [('C', 200), ('M', 50)],
'M': [('C', 100), ('L', 50), ('N', 100)],
'N': [('M', 100), ('O', 100)],
'O': [('N', 100), ('G', 300)]
}
In [0]:
state = {
'rewards': {'S': 0, 'A': 0, 'B': 1000, 'C': 1000, 'D': 0, 'E': 1000, 'F': 0, 'G': 1000, 'H': 1000, 'K': 0, 'L': 0, 'M': 1000, 'N': 0, 'O': 0},
'position': 'S',
'reward': 0,
'cost': 0,
'path': ['S']
}
In [0]:
from copy import deepcopy
import json
def as_string(state):
# reward/cost does not hurt, but is useless, path obsucres same state
new_state = {
'rewards': state['rewards'],
'position': state['position']
}
return json.dumps(new_state, sort_keys=True)
def is_goal(state):
if state['position'] != 'S': return False
for reward in state['rewards'].values():
if reward != 0: return False
return True
def expand(state):
states = []
for position, cost in static_map[state['position']]:
new_state = deepcopy(state)
new_state['position'] = position
new_state['rewards'][position] = 0
reward = state['rewards'][position]
new_state['reward'] += reward
new_state['cost'] += cost
new_state['path'].append(position)
states.append(new_state)
return states
In [0]:
def search(root, max_depth = 15):
closed = set()
open = []
open.append(root)
while open:
state = open.pop(0)
if as_string(state) in closed: continue
closed.add(as_string(state))
depth = len(state['path'])
if depth > max_depth:
print("Visited:", len(closed))
print("Reached max depth, without reaching goal")
return None
if is_goal(state):
print("Visited:", len(closed))
print("Open:", len(open))
print("Scaled reward:", (state['reward'] - state['cost']) / 6000)
print("Perfect path", state['path'])
return state
expanded = expand(state)
open += expanded
# make this Dijkstra
open.sort(key=lambda state: state['cost'])
In [43]:
%time search(state)
Out[43]:
In [0]: