Robot Run

The Game

In a certain terrain a Robot (R) plays against a Human player (H)

  • Both Human and Robot try to reach a goal which is at the same distance from both of them
  • Blocks (B) and both players block each other

In [0]:
terrain = [
    ["_", "R", "_", "_"],
    ["H", "_", "B", "_"],
    ["_", "_", "B", "_"],
    ["B", "_", "G", "_"]
]

Basic Game Playing Code


In [0]:
from copy import deepcopy
from math import sqrt, pow

robot_symbol = 'R'
robot_win_symbol = '*'
goal_symbol = 'G'
human_symbol = 'H'
human_win_symbol = '#'
blank_symbol = '_'

def field_contains(state, symbol):
    for row in state:
        for field in row:
            if field == symbol:
                return True
    return False   

def is_robot_win(state):
    return field_contains(state, robot_win_symbol)  

def is_human_win(state):
    return field_contains(state, human_win_symbol)  

def as_string(state):
    s = ''
    for row in state:
        row_string = ''
        for field in row:
            row_string += field + ' '
        s += row_string + '\n'
    return s

def locate(state, what):
    for row_index, row in enumerate(state):
        for column_index, field in enumerate(row):
            if field == what:
                return (row_index, column_index)

def check_position(state, position):
    max_row = len(state) - 1
    max_column = len(state[0]) - 1
    if position[0] < 0 or position[0] > max_row or position[1] < 0 or position[1] > max_column:
        return False
    symbol = state[position[0]][position[1]]
    if symbol != blank_symbol and symbol != goal_symbol:
        return False
    return True
            
def player_moves(state, player_symbol):
    player = locate(state, player_symbol)
    left = (player[0], player[1] - 1)
    right = (player[0], player[1] + 1)
    up = (player[0] - 1, player[1])
    down = (player[0] + 1, player[1])
    valid_moves = [move for move in (left, right, down, up) if check_position(state, move)]
    return valid_moves
            
def place_player(state, player, player_symbol, player_win_symbol):
    old_player = locate(state, player_symbol)
    new_state = deepcopy(state)
    new_state[old_player[0]][old_player[1]] = blank_symbol
    if new_state[player[0]][player[1]] == goal_symbol:
        new_state[player[0]][player[1]] = player_win_symbol
    else:
        new_state[player[0]][player[1]] = player_symbol
    return new_state

def expand(state, player_symbol, player_win_symbol):
    valid_moves = player_moves(state, player_symbol)
    new_states = [(position, place_player(state, position, player_symbol, player_win_symbol)) for position in valid_moves]
    return new_states

def expand_robot(state):
    return expand(state, robot_symbol, robot_win_symbol)

def expand_human(state):
    return expand(state, human_symbol, human_win_symbol)

def distance(pos1, pos2):
    if pos1 and pos2:
        return sqrt(pow(pos1[0] - pos2[0], 2) + pow(pos1[1] - pos2[1], 2))
    else:
        return 0
    
def estimate_state(state):
    goal_position = locate(state, goal_symbol)

    robot_position = locate(state, robot_symbol)
    human_position = locate(state, human_symbol)
    
    robot_distance = distance(robot_position, goal_position)
    human_distance = distance(human_position, goal_position)

    estimated_value = human_distance - robot_distance 
    return estimated_value

Depth first search as a recursive solution


In [0]:
# https://en.wikipedia.org/wiki/Depth-first_search
# 1  procedure DFS(G,v):
# 2      label v as discovered
# 3      for all edges from v to w in G.adjacentEdges(v) do
# 4          if vertex w is not labeled as discovered then
# 5              recursively call DFS(G,w)

def depth_first_search(state, max_depth=10, debug=False, closed_list=[], depth = 0, path=[]):
    if as_string(state) in closed_list or depth > max_depth:
        return None
    
    if debug:
        print('depth', depth)
        print('closed_list', closed_list)
        print('path', path)
        print('state', as_string(state))
        
    if is_robot_win(state):
        return path
    
    closed_list = closed_list + [as_string(state)]
    
    for move, next_state in expand_robot(state):
        new_path = path + [move]
        res = depth_first_search(next_state, max_depth, debug, closed_list, depth + 1, new_path)
        if res:
            return res

This quite obviously is not the shortest path, but who cares, as long as your robot wins


In [4]:
terrain


Out[4]:
[['_', 'R', '_', '_'],
 ['H', '_', 'B', '_'],
 ['_', '_', 'B', '_'],
 ['B', '_', 'G', '_']]

In [5]:
depth_first_search(terrain)


Out[5]:
[(0, 2), (0, 3), (1, 3), (2, 3), (3, 3), (3, 2)]

Minimax

This is not good enough, because now we have an adversary


In [0]:
# https://en.wikipedia.org/wiki/Minimax
# robot is maximizer, human is minimizer

min = float('-inf')
max = float('inf')

def mini_max(state, is_robot_move=True, max_depth=10, debug=False, verbose=False, depth = 0):
    if debug:
        print('-----')
        print('is_robot_move', is_robot_move)
        print('depth', depth)
        print('inspecting state')
        print(as_string(state))
        
    if is_robot_win(state):
        if verbose:
            print('-----')
            print('robot win detected')
            print('depth', depth)
            print('state', state)
            print('-----')
        return (max, None)
    
    if is_human_win(state):
        if verbose:
            print('-----')
            print('human win detected')
            print('depth', depth)
            print('state', state)
            print('-----')
        return (min, None)
    
    if depth == max_depth:
        estimated_value = estimate_state(state)
        if verbose:
            print('max depth reached, estimation at edge {}'.format(estimated_value))
        return (estimated_value, None)
    
    if is_robot_move:
        best_value = min
        best_move = None
        for move, next_state in expand_robot(state):
            value_for_move, _ =\
                mini_max(next_state, is_robot_move=False, max_depth=max_depth, debug=debug, verbose=verbose, depth = depth + 1)
            if value_for_move > best_value:
                best_value = value_for_move
                best_move = next_state
        return (best_value, best_move)
    else:
        best_value = max
        best_move = None
        for move, next_state in expand_human(state):
            value_for_move, _, =\
                mini_max(next_state, is_robot_move=True, max_depth=max_depth, debug=debug, verbose=verbose, depth = depth + 1)
            if value_for_move < best_value:
                best_value = value_for_move
                best_move = next_state
        return (best_value, best_move)

In [7]:
terrain


Out[7]:
[['_', 'R', '_', '_'],
 ['H', '_', 'B', '_'],
 ['_', '_', 'B', '_'],
 ['B', '_', 'G', '_']]

It seems like who ever starts wins


In [8]:
mini_max(terrain)


Out[8]:
(inf,
 [['_', '_', '_', '_'],
  ['H', 'R', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']])

In [9]:
mini_max(terrain, is_robot_move=False)


Out[9]:
(-inf,
 [['_', 'R', '_', '_'],
  ['_', 'H', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']])

In [0]:
simple_terrain = [
    ["R", "_" ],
    ["_", "G"],
    ["H", "_"]
]

In [11]:
# after 3 moves in total (2 robot, 1 human) we have a win for robot
# mini_max(simple_terrain, max_depth = 1)
# mini_max(simple_terrain, max_depth = 2)
mini_max(simple_terrain, max_depth = 3, verbose=True)


max depth reached, estimation at edge -0.41421356237309515
-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
max depth reached, estimation at edge -0.41421356237309515
-----
robot win detected
depth 3
state [['_', '_'], ['H', '*'], ['_', '_']]
-----
-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
max depth reached, estimation at edge -0.41421356237309515
max depth reached, estimation at edge -0.41421356237309515
Out[11]:
(inf, [['_', 'R'], ['_', 'G'], ['H', '_']])

Alpha Beta Pruning

We are checking on a lot of obviously stupid moves

  • we repeatedly check for robot win, even though we could know we already won
  • if we did not we could look at more promising moves instead
  • this of course would only pay off in larger mazes

In [0]:
# https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning
def alpha_beta(state, alpha = min, beta = max, is_robot_move=True, max_depth=10, depth = 0, verbose=True, debug=False):
    if debug:
        print('-----')
        print('is_robot_move', is_robot_move)
        print('depth', depth)
        print('inspecting state')
        print(as_string(state))

    if is_robot_win(state):
        if verbose:
            print('-----')
            print('robot win detected')
            print('depth', depth)
            print('state', state)
            print('-----')
        return (max, None)
    
    if is_human_win(state):
        if verbose:
            print('-----')
            print('human win detected')
            print('depth', depth)
            print('state', state)
            print('-----')
        return (min, None)
    
    if depth == max_depth:
        estimated_value = estimate_state(state)
        if verbose:
            print('max depth reached, estimation at edge {}'.format(estimated_value))

        return (estimated_value, None)
    
    if is_robot_move:
        best_value = min
        best_move = None
        for move, next_state in expand_robot(state):
            value_for_move, _ =\
                alpha_beta(next_state, is_robot_move=False, alpha = alpha, beta = beta, max_depth=max_depth, verbose=verbose, debug=debug, depth = depth + 1)
            if value_for_move > best_value:
                best_value = value_for_move
                best_move = next_state
            if best_value > alpha:
                if debug:
                    print('adjusting alpha from {} to {}'.format(alpha, best_value))
                alpha = best_value
            if beta <= alpha:
                if debug:
                    print('breaking, beta {} <= alpha {}'.format(beta, alpha))
                break
        return (best_value, best_move)
    else:
        best_value = max
        best_move = None
        for move, next_state in expand_human(state):
            value_for_move, _, =\
                alpha_beta(next_state, is_robot_move=True, alpha = alpha, beta = beta, max_depth=max_depth, verbose=verbose, debug=debug, depth = depth + 1)
            if value_for_move < best_value:
                best_value = value_for_move
                best_move = next_state
            if best_value < beta:
                if debug:
                    print('adjusting beta from {} to {}'.format(beta, best_value))
                beta = best_value
            if beta <= alpha:
                if debug:
                    print('breaking, beta {} <= alpha {}'.format(beta, alpha))
                break
        return (best_value, best_move)

In [13]:
mini_max(simple_terrain, max_depth = 4, verbose=True)


max depth reached, estimation at edge 0.0
-----
human win detected
depth 4
state [['R', '_'], ['_', '#'], ['_', '_']]
-----
-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
-----
human win detected
depth 4
state [['R', '_'], ['_', '#'], ['_', '_']]
-----
max depth reached, estimation at edge 0.0
-----
robot win detected
depth 3
state [['_', '_'], ['H', '*'], ['_', '_']]
-----
-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
-----
human win detected
depth 4
state [['_', '_'], ['_', '#'], ['R', '_']]
-----
max depth reached, estimation at edge 0.0
-----
human win detected
depth 4
state [['R', '_'], ['_', '#'], ['_', '_']]
-----
Out[13]:
(inf, [['_', 'R'], ['_', 'G'], ['H', '_']])

In [14]:
alpha_beta(simple_terrain, max_depth = 4, verbose=True)


max depth reached, estimation at edge 0.0
-----
human win detected
depth 4
state [['R', '_'], ['_', '#'], ['_', '_']]
-----
-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
-----
human win detected
depth 4
state [['R', '_'], ['_', '#'], ['_', '_']]
-----
-----
robot win detected
depth 3
state [['_', '_'], ['H', '*'], ['_', '_']]
-----
Out[14]:
(inf, [['_', 'R'], ['_', 'G'], ['H', '_']])

In [15]:
%time mini_max(terrain, max_depth = 15, verbose=False)


CPU times: user 11.2 s, sys: 433 µs, total: 11.2 s
Wall time: 11.2 s
Out[15]:
(inf,
 [['_', '_', '_', '_'],
  ['H', 'R', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']])

In [16]:
%time alpha_beta(terrain, max_depth = 15, verbose=False)


CPU times: user 249 ms, sys: 2 ms, total: 251 ms
Wall time: 252 ms
Out[16]:
(inf,
 [['_', '_', '_', '_'],
  ['H', 'R', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']])

In [17]:
%time alpha_beta(terrain, max_depth = 20, verbose=False)


CPU times: user 1.36 s, sys: 0 ns, total: 1.36 s
Wall time: 1.36 s
Out[17]:
(inf,
 [['_', '_', '_', '_'],
  ['H', 'R', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']])

In [18]:
%time alpha_beta(terrain, max_depth = 25, verbose=False)


CPU times: user 8.68 s, sys: 2.98 ms, total: 8.68 s
Wall time: 8.68 s
Out[18]:
(inf,
 [['_', '_', '_', '_'],
  ['H', 'R', 'B', '_'],
  ['_', '_', 'B', '_'],
  ['B', '_', 'G', '_']])

In [0]:
%time alpha_beta(terrain, max_depth = 30, verbose=False)

In [0]:
simple_terrain


Out[0]:
[['R', '_'], ['_', 'G'], ['H', '_']]

In [0]:
# booth mini max and alpha beta expand the same left side, but alpha beta prunes complete right side (see mini-max-tree.jpg)
mini_max(simple_terrain, max_depth = 3, verbose=True, debug=True)


-----
is_robot_move True
depth 0
inspecting state
R _ 
_ G 
H _ 

-----
is_robot_move False
depth 1
inspecting state
_ R 
_ G 
H _ 

-----
is_robot_move True
depth 2
inspecting state
_ R 
_ G 
_ H 

-----
is_robot_move False
depth 3
inspecting state
R _ 
_ G 
_ H 

max depth reached, estimation at edge -0.41421356237309515
-----
is_robot_move False
depth 3
inspecting state
_ _ 
_ * 
_ H 

-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
-----
is_robot_move True
depth 2
inspecting state
_ R 
H G 
_ _ 

-----
is_robot_move False
depth 3
inspecting state
R _ 
H G 
_ _ 

max depth reached, estimation at edge -0.41421356237309515
-----
is_robot_move False
depth 3
inspecting state
_ _ 
H * 
_ _ 

-----
robot win detected
depth 3
state [['_', '_'], ['H', '*'], ['_', '_']]
-----
-----
is_robot_move False
depth 1
inspecting state
_ _ 
R G 
H _ 

-----
is_robot_move True
depth 2
inspecting state
_ _ 
R G 
_ H 

-----
is_robot_move False
depth 3
inspecting state
_ _ 
_ * 
_ H 

-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
-----
is_robot_move False
depth 3
inspecting state
_ _ 
_ G 
R H 

max depth reached, estimation at edge -0.41421356237309515
-----
is_robot_move False
depth 3
inspecting state
R _ 
_ G 
_ H 

max depth reached, estimation at edge -0.41421356237309515
Out[0]:
(inf, [['_', 'R'], ['_', 'G'], ['H', '_']])

In [0]:
alpha_beta(simple_terrain, max_depth = 3, verbose=True, debug=True)


-----
is_robot_move True
depth 0
inspecting state
R _ 
_ G 
H _ 

-----
is_robot_move False
depth 1
inspecting state
_ R 
_ G 
H _ 

-----
is_robot_move True
depth 2
inspecting state
_ R 
_ G 
_ H 

-----
is_robot_move False
depth 3
inspecting state
R _ 
_ G 
_ H 

max depth reached, estimation at edge -0.41421356237309515
adjusting alpha from -inf to -0.41421356237309515
-----
is_robot_move False
depth 3
inspecting state
_ _ 
_ * 
_ H 

-----
robot win detected
depth 3
state [['_', '_'], ['_', '*'], ['_', 'H']]
-----
adjusting alpha from -0.41421356237309515 to inf
breaking, beta inf <= alpha inf
-----
is_robot_move True
depth 2
inspecting state
_ R 
H G 
_ _ 

-----
is_robot_move False
depth 3
inspecting state
R _ 
H G 
_ _ 

max depth reached, estimation at edge -0.41421356237309515
adjusting alpha from -inf to -0.41421356237309515
-----
is_robot_move False
depth 3
inspecting state
_ _ 
H * 
_ _ 

-----
robot win detected
depth 3
state [['_', '_'], ['H', '*'], ['_', '_']]
-----
adjusting alpha from -0.41421356237309515 to inf
breaking, beta inf <= alpha inf
adjusting alpha from -inf to inf
breaking, beta inf <= alpha inf
Out[0]:
(inf, [['_', 'R'], ['_', 'G'], ['H', '_']])

In [0]: