In [0]:
static_map = {
            'S': [('A', 300), ('B', 100), ('C', 200 )],
            'A': [('S', 300), ('B', 100), ('E', 100 ), ('D', 100 )],
            'B': [('S', 100), ('A', 100), ('C', 50 ), ('K', 200 )],
            'C': [('S', 200), ('B', 50), ('M', 100 ), ('L', 200 )],
            'D': [('A', 100), ('F', 50)],
            'E': [('A', 100), ('F', 100), ('H', 100)],
            'F': [('D', 50), ('E', 100), ('G', 200)],
            'G': [('F', 200), ('O', 300)],
            'H': [('E', 100), ('K', 300)],
            'K': [('B', 200), ('H', 300)],
            'L': [('C', 200), ('M', 50)],
            'M': [('C', 100), ('L', 50), ('N', 100)],
            'N': [('M', 100), ('O', 100)],
            'O': [('N', 100), ('G', 300)]
        }

In [0]:
state = {
    'rewards': {'S': 0, 'A': 0, 'B': 1000, 'C': 1000, 'D': 0, 'E': 1000, 'F': 0, 'G': 1000, 'H': 1000, 'K': 0, 'L': 0, 'M': 1000, 'N': 0, 'O': 0},
    'position': 'S',
    'reward': 0,
    'cost': 0,
    'path': ['S']
}

In [0]:
from copy import deepcopy
import json

def as_string(state):
  # reward/cost does not hurt, but is useless, path obsucres same state
  new_state = {
      'rewards': state['rewards'],
      'position': state['position']
  }
  return json.dumps(new_state, sort_keys=True)
  
def is_goal(state):
  if state['position'] != 'S': return False
  for reward in state['rewards'].values():
    if reward != 0: return False
  return True
    

def expand(state):
  states = []
  for position, cost in static_map[state['position']]:
    new_state = deepcopy(state)
    new_state['position'] = position
    new_state['rewards'][position] = 0
    reward = state['rewards'][position]
    new_state['reward'] += reward
    new_state['cost'] += cost
    new_state['path'].append(position)
    states.append(new_state)
  return states

In [0]:
def search(root, max_depth = 15):
    closed = set()

    open = []
    open.append(root)
    
    while open:
        state = open.pop(0)
        if as_string(state) in closed: continue  

        closed.add(as_string(state))
        
        depth = len(state['path'])
        if depth > max_depth:
          print("Visited:", len(closed))
          print("Reached max depth, without reaching goal")
          return None
        
        if is_goal(state):
            print("Visited:", len(closed))
            print("Open:", len(open))
            print("Scaled reward:", (state['reward'] - state['cost']) / 6000)            
            print("Perfect path", state['path'])
            return state

        expanded = expand(state)
        open += expanded
        # make this Dijkstra
        open.sort(key=lambda state: state['cost'])

In [43]:
%time search(state)


Visited: 381
Open: 115
Scaled reward: 0.7416666666666667
Perfect path ['S', 'B', 'C', 'M', 'N', 'O', 'G', 'F', 'E', 'H', 'E', 'A', 'B', 'S']
CPU times: user 80.5 ms, sys: 2.52 ms, total: 83.1 ms
Wall time: 85.6 ms
Out[43]:
{'cost': 1550,
 'path': ['S',
  'B',
  'C',
  'M',
  'N',
  'O',
  'G',
  'F',
  'E',
  'H',
  'E',
  'A',
  'B',
  'S'],
 'position': 'S',
 'reward': 6000,
 'rewards': {'A': 0,
  'B': 0,
  'C': 0,
  'D': 0,
  'E': 0,
  'F': 0,
  'G': 0,
  'H': 0,
  'K': 0,
  'L': 0,
  'M': 0,
  'N': 0,
  'O': 0,
  'S': 0}}

In [0]: