In [0]:
terrain = [
["_", "R", "_", "_"],
["H", "_", "B", "_"],
["_", "_", "B", "_"],
["B", "_", "G", "_"]
]
In [0]:
from copy import deepcopy
from math import sqrt, pow
robot_symbol = 'R'
robot_win_symbol = '*'
goal_symbol = 'G'
human_symbol = 'H'
human_win_symbol = '#'
blank_symbol = '_'
def field_contains(state, symbol):
for row in state:
for field in row:
if field == symbol:
return True
return False
def is_robot_win(state):
return field_contains(state, robot_win_symbol)
def is_human_win(state):
return field_contains(state, human_win_symbol)
def as_string(state):
s = ''
for row in state:
row_string = ''
for field in row:
row_string += field + ' '
s += row_string + '\n'
return s
def locate(state, what):
for row_index, row in enumerate(state):
for column_index, field in enumerate(row):
if field == what:
return (row_index, column_index)
def check_position(state, position):
max_row = len(state) - 1
max_column = len(state[0]) - 1
if position[0] < 0 or position[0] > max_row or position[1] < 0 or position[1] > max_column:
return False
symbol = state[position[0]][position[1]]
if symbol != blank_symbol and symbol != goal_symbol:
return False
return True
def player_moves(state, player_symbol):
player = locate(state, player_symbol)
left = (player[0], player[1] - 1)
right = (player[0], player[1] + 1)
up = (player[0] - 1, player[1])
down = (player[0] + 1, player[1])
valid_moves = [move for move in (left, right, down, up) if check_position(state, move)]
return valid_moves
def place_player(state, player, player_symbol, player_win_symbol):
old_player = locate(state, player_symbol)
new_state = deepcopy(state)
new_state[old_player[0]][old_player[1]] = blank_symbol
if new_state[player[0]][player[1]] == goal_symbol:
new_state[player[0]][player[1]] = player_win_symbol
else:
new_state[player[0]][player[1]] = player_symbol
return new_state
def expand(state, player_symbol, player_win_symbol):
valid_moves = player_moves(state, player_symbol)
new_states = [(position, place_player(state, position, player_symbol, player_win_symbol)) for position in valid_moves]
return new_states
def expand_robot(state):
return expand(state, robot_symbol, robot_win_symbol)
def expand_human(state):
return expand(state, human_symbol, human_win_symbol)
def distance(pos1, pos2):
if pos1 and pos2:
return sqrt(pow(pos1[0] - pos2[0], 2) + pow(pos1[1] - pos2[1], 2))
else:
return 0
def estimate_state(state):
goal_position = locate(state, goal_symbol)
robot_position = locate(state, robot_symbol)
human_position = locate(state, human_symbol)
robot_distance = distance(robot_position, goal_position)
human_distance = distance(human_position, goal_position)
estimated_value = human_distance - robot_distance
return estimated_value
In [0]:
# https://en.wikipedia.org/wiki/Depth-first_search
# 1 procedure DFS(G,v):
# 2 label v as discovered
# 3 for all edges from v to w in G.adjacentEdges(v) do
# 4 if vertex w is not labeled as discovered then
# 5 recursively call DFS(G,w)
def depth_first_search(state, max_depth=10, debug=False, closed_list=[], depth = 0, path=[]):
if as_string(state) in closed_list or depth > max_depth:
return None
if debug:
print('depth', depth)
print('closed_list', closed_list)
print('path', path)
print('state', as_string(state))
if is_robot_win(state):
return path
closed_list = closed_list + [as_string(state)]
for move, next_state in expand_robot(state):
new_path = path + [move]
res = depth_first_search(next_state, max_depth, debug, closed_list, depth + 1, new_path)
if res:
return res
In [4]:
terrain
Out[4]:
In [5]:
depth_first_search(terrain)
Out[5]:
In [0]:
# https://en.wikipedia.org/wiki/Minimax
# robot is maximizer, human is minimizer
min = float('-inf')
max = float('inf')
def mini_max(state, is_robot_move=True, max_depth=10, debug=False, verbose=False, depth = 0):
if debug:
print('-----')
print('is_robot_move', is_robot_move)
print('depth', depth)
print('inspecting state')
print(as_string(state))
if is_robot_win(state):
if verbose:
print('-----')
print('robot win detected')
print('depth', depth)
print('state', state)
print('-----')
return (max, None)
if is_human_win(state):
if verbose:
print('-----')
print('human win detected')
print('depth', depth)
print('state', state)
print('-----')
return (min, None)
if depth == max_depth:
estimated_value = estimate_state(state)
if verbose:
print('max depth reached, estimation at edge {}'.format(estimated_value))
return (estimated_value, None)
if is_robot_move:
best_value = min
best_move = None
for move, next_state in expand_robot(state):
value_for_move, _ =\
mini_max(next_state, is_robot_move=False, max_depth=max_depth, debug=debug, verbose=verbose, depth = depth + 1)
if value_for_move > best_value:
best_value = value_for_move
best_move = next_state
return (best_value, best_move)
else:
best_value = max
best_move = None
for move, next_state in expand_human(state):
value_for_move, _, =\
mini_max(next_state, is_robot_move=True, max_depth=max_depth, debug=debug, verbose=verbose, depth = depth + 1)
if value_for_move < best_value:
best_value = value_for_move
best_move = next_state
return (best_value, best_move)
In [7]:
terrain
Out[7]:
In [8]:
mini_max(terrain)
Out[8]:
In [9]:
mini_max(terrain, is_robot_move=False)
Out[9]:
In [0]:
simple_terrain = [
["R", "_" ],
["_", "G"],
["H", "_"]
]
In [11]:
# after 3 moves in total (2 robot, 1 human) we have a win for robot
# mini_max(simple_terrain, max_depth = 1)
# mini_max(simple_terrain, max_depth = 2)
mini_max(simple_terrain, max_depth = 3, verbose=True)
Out[11]:
In [0]:
# https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning
def alpha_beta(state, alpha = min, beta = max, is_robot_move=True, max_depth=10, depth = 0, verbose=True, debug=False):
if debug:
print('-----')
print('is_robot_move', is_robot_move)
print('depth', depth)
print('inspecting state')
print(as_string(state))
if is_robot_win(state):
if verbose:
print('-----')
print('robot win detected')
print('depth', depth)
print('state', state)
print('-----')
return (max, None)
if is_human_win(state):
if verbose:
print('-----')
print('human win detected')
print('depth', depth)
print('state', state)
print('-----')
return (min, None)
if depth == max_depth:
estimated_value = estimate_state(state)
if verbose:
print('max depth reached, estimation at edge {}'.format(estimated_value))
return (estimated_value, None)
if is_robot_move:
best_value = min
best_move = None
for move, next_state in expand_robot(state):
value_for_move, _ =\
alpha_beta(next_state, is_robot_move=False, alpha = alpha, beta = beta, max_depth=max_depth, verbose=verbose, debug=debug, depth = depth + 1)
if value_for_move > best_value:
best_value = value_for_move
best_move = next_state
if best_value > alpha:
if debug:
print('adjusting alpha from {} to {}'.format(alpha, best_value))
alpha = best_value
if beta <= alpha:
if debug:
print('breaking, beta {} <= alpha {}'.format(beta, alpha))
break
return (best_value, best_move)
else:
best_value = max
best_move = None
for move, next_state in expand_human(state):
value_for_move, _, =\
alpha_beta(next_state, is_robot_move=True, alpha = alpha, beta = beta, max_depth=max_depth, verbose=verbose, debug=debug, depth = depth + 1)
if value_for_move < best_value:
best_value = value_for_move
best_move = next_state
if best_value < beta:
if debug:
print('adjusting beta from {} to {}'.format(beta, best_value))
beta = best_value
if beta <= alpha:
if debug:
print('breaking, beta {} <= alpha {}'.format(beta, alpha))
break
return (best_value, best_move)
In [13]:
mini_max(simple_terrain, max_depth = 4, verbose=True)
Out[13]:
In [14]:
alpha_beta(simple_terrain, max_depth = 4, verbose=True)
Out[14]:
In [15]:
%time mini_max(terrain, max_depth = 15, verbose=False)
Out[15]:
In [16]:
%time alpha_beta(terrain, max_depth = 15, verbose=False)
Out[16]:
In [17]:
%time alpha_beta(terrain, max_depth = 20, verbose=False)
Out[17]:
In [18]:
%time alpha_beta(terrain, max_depth = 25, verbose=False)
Out[18]:
In [0]:
%time alpha_beta(terrain, max_depth = 30, verbose=False)
In [0]:
simple_terrain
Out[0]:
In [0]:
# booth mini max and alpha beta expand the same left side, but alpha beta prunes complete right side (see mini-max-tree.jpg)
mini_max(simple_terrain, max_depth = 3, verbose=True, debug=True)
Out[0]:
In [0]:
alpha_beta(simple_terrain, max_depth = 3, verbose=True, debug=True)
Out[0]:
In [0]: