Game Tree Search

We start with defining the abstract class Game, for turn-taking n-player games. We rely on, but do not define yet, the concept of a state of the game; we'll see later how individual games define states. For now, all we require is that a state has a state.to_move attribute, which gives the name of the player whose turn it is. ("Name" will be something like 'X' or 'O' for tic-tac-toe.)

We also define play_game, which takes a game and a dictionary of {player_name: strategy_function} pairs, and plays out the game, on each turn checking state.to_move to see whose turn it is, and then getting the strategy function for that player and applying it to the game and the state to get a move.


In [1]:
from collections import namedtuple, Counter, defaultdict
import random
import math
import functools 
cache = functools.lru_cache(10**6)

In [73]:
class Game:
    """A game is similar to a problem, but it has a terminal test instead of 
    a goal test, and a utility for each terminal state. To create a game, 
    subclass this class and implement `actions`, `result`, `is_terminal`, 
    and `utility`. You will also need to set the .initial attribute to the 
    initial state; this can be done in the constructor."""

    def actions(self, state):
        """Return a collection of the allowable moves from this state."""
        raise NotImplementedError

    def result(self, state, move):
        """Return the state that results from making a move from a state."""
        raise NotImplementedError

    def is_terminal(self, state):
        """Return True if this is a final state for the game."""
        return not self.actions(state)
    
    def utility(self, state, player):
        """Return the value of this final state to player."""
        raise NotImplementedError
        

def play_game(game, strategies: dict, verbose=False):
    """Play a turn-taking game. `strategies` is a {player_name: function} dict,
    where function(state, game) is used to get the player's move."""
    state = game.initial
    while not game.is_terminal(state):
        player = state.to_move
        move = strategies[player](game, state)
        state = game.result(state, move)
        if verbose: 
            print('Player', player, 'move:', move)
            print(state)
    return state

Minimax-Based Game Search Algorithms

We will define several game search algorithms. Each takes two inputs, the game we are playing and the current state of the game, and returns a a (value, move) pair, where value is the utility that the algorithm computes for the player whose turn it is to move, and move is the move itself.

First we define minimax_search, which exhaustively searches the game tree to find an optimal move (assuming both players play optimally), and alphabeta_search, which does the same computation, but prunes parts of the tree that could not possibly have an affect on the optimnal move.


In [66]:
def minimax_search(game, state):
    """Search game tree to determine best move; return (value, move) pair."""

    player = state.to_move

    def max_value(state):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = -infinity, None
        for a in game.actions(state):
            v2, _ = min_value(game.result(state, a))
            if v2 > v:
                v, move = v2, a
        return v, move

    def min_value(state):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = +infinity, None
        for a in game.actions(state):
            v2, _ = max_value(game.result(state, a))
            if v2 < v:
                v, move = v2, a
        return v, move

    return max_value(state)

infinity = math.inf

def alphabeta_search(game, state):
    """Search game to determine best action; use alpha-beta pruning.
    As in [Figure 5.7], this version searches all the way to the leaves."""

    player = state.to_move

    def max_value(state, alpha, beta):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = -infinity, None
        for a in game.actions(state):
            v2, _ = min_value(game.result(state, a), alpha, beta)
            if v2 > v:
                v, move = v2, a
                alpha = max(alpha, v)
            if v >= beta:
                return v, move
        return v, move

    def min_value(state, alpha, beta):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = +infinity, None
        for a in game.actions(state):
            v2, _ = max_value(game.result(state, a), alpha, beta)
            if v2 < v:
                v, move = v2, a
                beta = min(beta, v)
            if v <= alpha:
                return v, move
        return v, move

    return max_value(state, -infinity, +infinity)

A Simple Game: Tic-Tac-Toe

We have the notion of an abstract game, we have some search functions; now it is time to define a real game; a simple one, tic-tac-toe. Moves are (x, y) pairs denoting squares, where (0, 0) is the top left, and (2, 2) is the bottom right (on a board of size height=width=3).


In [67]:
class TicTacToe(Game):
    """Play TicTacToe on an `height` by `width` board, needing `k` in a row to win.
    'X' plays first against 'O'."""

    def __init__(self, height=3, width=3, k=3):
        self.k = k # k in a row
        self.squares = {(x, y) for x in range(width) for y in range(height)}
        self.initial = Board(height=height, width=width, to_move='X', utility=0)

    def actions(self, board):
        """Legal moves are any square not yet taken."""
        return self.squares - set(board)

    def result(self, board, square):
        """Place a marker for current player on square."""
        player = board.to_move
        board = board.new({square: player}, to_move=('O' if player == 'X' else 'X'))
        win = k_in_row(board, player, square, self.k)
        board.utility = (0 if not win else +1 if player == 'X' else -1)
        return board

    def utility(self, board, player):
        """Return the value to player; 1 for win, -1 for loss, 0 otherwise."""
        return board.utility if player == 'X' else -board.utility

    def is_terminal(self, board):
        """A board is a terminal state if it is won or there are no empty squares."""
        return board.utility != 0 or len(self.squares) == len(board)

    def display(self, board): print(board)     


def k_in_row(board, player, square, k):
    """True if player has k pieces in a line through square."""
    def in_row(x, y, dx, dy): return 0 if board[x, y] != player else 1 + in_row(x + dx, y + dy, dx, dy)
    return any(in_row(*square, dx, dy) + in_row(*square, -dx, -dy) - 1 >= k
               for (dx, dy) in ((0, 1), (1, 0), (1, 1), (1, -1)))

States in tic-tac-toe (and other games) will be represented as a Board, which is a subclass of defaultdict that in general will consist of {(x, y): contents} pairs, for example {(0, 0): 'X', (1, 1): 'O'} might be the state of the board after two moves. Besides the contents of squares, a board also has some attributes:

  • .to_move to name the player whose move it is;
  • .width and .height to give the size of the board (both 3 in tic-tac-toe, but other numbers in related games);
  • possibly other attributes, as specified by keywords.

As a defaultdict, the Board class has a __missing__ method, which returns empty for squares that have no been assigned but are within the width × height boundaries, or off otherwise. The class has a __hash__ method, so instances can be stored in hash tables.


In [68]:
class Board(defaultdict):
    """A board has the player to move, a cached utility value, 
    and a dict of {(x, y): player} entries, where player is 'X' or 'O'."""
    empty = '.'
    off = '#'
    
    def __init__(self, width=8, height=8, to_move=None, **kwds):
        self.__dict__.update(width=width, height=height, to_move=to_move, **kwds)
        
    def new(self, changes: dict, **kwds) -> 'Board':
        "Given a dict of {(x, y): contents} changes, return a new Board with the changes."
        board = Board(width=self.width, height=self.height, **kwds)
        board.update(self)
        board.update(changes)
        return board

    def __missing__(self, loc):
        x, y = loc
        if 0 <= x < self.width and 0 <= y < self.height:
            return self.empty
        else:
            return self.off
            
    def __hash__(self): 
        return hash(tuple(sorted(self.items()))) + hash(self.to_move)
    
    def __repr__(self):
        def row(y): return ' '.join(self[x, y] for x in range(self.width))
        return '\n'.join(map(row, range(self.height))) +  '\n'

Players

We need an interface for players. I'll represent a player as a callable that will be passed two arguments: (game, state) and will return a move. The function player creates a player out of a search algorithm, but you can create your own players as functions, as is done with random_player below:


In [69]:
def random_player(game, state): return random.choice(list(game.actions(state)))

def player(search_algorithm):
    """A game player who uses the specified search algorithm"""
    return lambda game, state: search_algorithm(game, state)[1]

Playing a Game

We're ready to play a game. I'll set up a match between a random_player (who chooses randomly from the legal moves) and a player(alphabeta_search) (who makes the optimal alpha-beta move; practical for tic-tac-toe, but not for large games). The player(alphabeta_search) will never lose, but if random_player is lucky, it will be a tie.


In [74]:
play_game(TicTacToe(), dict(X=random_player, O=player(alphabeta_search)), verbose=True).utility


Player X move: (0, 0)
X . .
. . .
. . .

Player O move: (1, 1)
X . .
. O .
. . .

Player X move: (1, 2)
X . .
. O .
. X .

Player O move: (0, 1)
X . .
O O .
. X .

Player X move: (2, 1)
X . .
O O X
. X .

Player O move: (2, 0)
X . O
O O X
. X .

Player X move: (2, 2)
X . O
O O X
. X X

Player O move: (0, 2)
X . O
O O X
O X X

Out[74]:
-1

The alpha-beta player will never lose, but sometimes the random player can stumble into a draw. When two optimal (alpha-beta or minimax) players compete, it will always be a draw:


In [75]:
play_game(TicTacToe(), dict(X=player(alphabeta_search), O=player(minimax_search)), verbose=True).utility


Player X move: (0, 1)
. . .
X . .
. . .

Player O move: (0, 0)
O . .
X . .
. . .

Player X move: (2, 0)
O . X
X . .
. . .

Player O move: (2, 1)
O . X
X . O
. . .

Player X move: (1, 2)
O . X
X . O
. X .

Player O move: (0, 2)
O . X
X . O
O X .

Player X move: (1, 0)
O X X
X . O
O X .

Player O move: (1, 1)
O X X
X O O
O X .

Player X move: (2, 2)
O X X
X O O
O X X

Out[75]:
0

Connect Four

Connect Four is a variant of tic-tac-toe, played on a larger (7 x 6) board, and with the restriction that in any column you can only play in the lowest empty square in the column.


In [76]:
class ConnectFour(TicTacToe):
    
    def __init__(self): super().__init__(width=7, height=6, k=4)

    def actions(self, board):
        """In each column you can play only the lowest empty square in the column."""
        return {(x, y) for (x, y) in self.squares - set(board)
                if y == board.height - 1 or (x, y + 1) in board}

In [77]:
play_game(ConnectFour(), dict(X=random_player, O=random_player), verbose=True).utility


Player X move: (2, 5)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . . .

Player O move: (1, 5)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. O X . . . .

Player X move: (5, 5)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. O X . . X .

Player O move: (4, 5)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. O X . O X .

Player X move: (4, 4)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . X . .
. O X . O X .

Player O move: (2, 4)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . O . X . .
. O X . O X .

Player X move: (2, 3)
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . . .
. . O . X . .
. O X . O X .

Player O move: (1, 4)
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . . .
. O O . X . .
. O X . O X .

Player X move: (0, 5)
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . . .
. O O . X . .
X O X . O X .

Player O move: (5, 4)
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . . .
. O O . X O .
X O X . O X .

Player X move: (5, 3)
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . X .
. O O . X O .
X O X . O X .

Player O move: (6, 5)
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . X .
. O O . X O .
X O X . O X O

Player X move: (1, 3)
. . . . . . .
. . . . . . .
. . . . . . .
. X X . . X .
. O O . X O .
X O X . O X O

Player O move: (6, 4)
. . . . . . .
. . . . . . .
. . . . . . .
. X X . . X .
. O O . X O O
X O X . O X O

Player X move: (5, 2)
. . . . . . .
. . . . . . .
. . . . . X .
. X X . . X .
. O O . X O O
X O X . O X O

Player O move: (0, 4)
. . . . . . .
. . . . . . .
. . . . . X .
. X X . . X .
O O O . X O O
X O X . O X O

Player X move: (0, 3)
. . . . . . .
. . . . . . .
. . . . . X .
X X X . . X .
O O O . X O O
X O X . O X O

Player O move: (0, 2)
. . . . . . .
. . . . . . .
O . . . . X .
X X X . . X .
O O O . X O O
X O X . O X O

Player X move: (0, 1)
. . . . . . .
X . . . . . .
O . . . . X .
X X X . . X .
O O O . X O O
X O X . O X O

Player O move: (0, 0)
O . . . . . .
X . . . . . .
O . . . . X .
X X X . . X .
O O O . X O O
X O X . O X O

Player X move: (5, 1)
O . . . . . .
X . . . . X .
O . . . . X .
X X X . . X .
O O O . X O O
X O X . O X O

Player O move: (4, 3)
O . . . . . .
X . . . . X .
O . . . . X .
X X X . O X .
O O O . X O O
X O X . O X O

Player X move: (6, 3)
O . . . . . .
X . . . . X .
O . . . . X .
X X X . O X X
O O O . X O O
X O X . O X O

Player O move: (5, 0)
O . . . . O .
X . . . . X .
O . . . . X .
X X X . O X X
O O O . X O O
X O X . O X O

Player X move: (3, 5)
O . . . . O .
X . . . . X .
O . . . . X .
X X X . O X X
O O O . X O O
X O X X O X O

Player O move: (1, 2)
O . . . . O .
X . . . . X .
O O . . . X .
X X X . O X X
O O O . X O O
X O X X O X O

Player X move: (1, 1)
O . . . . O .
X X . . . X .
O O . . . X .
X X X . O X X
O O O . X O O
X O X X O X O

Player O move: (3, 4)
O . . . . O .
X X . . . X .
O O . . . X .
X X X . O X X
O O O O X O O
X O X X O X O

Out[77]:
-1

Transposition Tables

By treating the game tree as a tree, we can arrive at the same state through different paths, and end up duplicating effort. In state-space search, we kept a table of reached states to prevent this. For game-tree search, we can achieve the same effect by applying the @cache decorator to the min_value and max_value functions. We'll use the suffix _tt to indicate a function that uses these transisiton tables.


In [11]:
def minimax_search_tt(game, state):
    """Search game to determine best move; return (value, move) pair."""

    player = state.to_move

    @cache
    def max_value(state):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = -infinity, None
        for a in game.actions(state):
            v2, _ = min_value(game.result(state, a))
            if v2 > v:
                v, move = v2, a
        return v, move

    @cache
    def min_value(state):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = +infinity, None
        for a in game.actions(state):
            v2, _ = max_value(game.result(state, a))
            if v2 < v:
                v, move = v2, a
        return v, move

    return max_value(state)

For alpha-beta search, we can still use a cache, but it should be based just on the state, not on whatever values alpha and beta have.


In [79]:
def cache1(function):
    "Like lru_cache(None), but only considers the first argument of function."
    cache = {}
    def wrapped(x, *args):
        if x not in cache:
            cache[x] = function(x, *args)
        return cache[x]
    return wrapped

def alphabeta_search_tt(game, state):
    """Search game to determine best action; use alpha-beta pruning.
    As in [Figure 5.7], this version searches all the way to the leaves."""

    player = state.to_move

    @cache1
    def max_value(state, alpha, beta):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = -infinity, None
        for a in game.actions(state):
            v2, _ = min_value(game.result(state, a), alpha, beta)
            if v2 > v:
                v, move = v2, a
                alpha = max(alpha, v)
            if v >= beta:
                return v, move
        return v, move

    @cache1
    def min_value(state, alpha, beta):
        if game.is_terminal(state):
            return game.utility(state, player), None
        v, move = +infinity, None
        for a in game.actions(state):
            v2, _ = max_value(game.result(state, a), alpha, beta)
            if v2 < v:
                v, move = v2, a
                beta = min(beta, v)
            if v <= alpha:
                return v, move
        return v, move

    return max_value(state, -infinity, +infinity)

In [81]:
%time play_game(TicTacToe(), {'X':player(alphabeta_search_tt), 'O':player(minimax_search_tt)})


CPU times: user 593 ms, sys: 52 ms, total: 645 ms
Wall time: 655 ms
Out[81]:
O X X
X O O
O X X

In [82]:
%time play_game(TicTacToe(), {'X':player(alphabeta_search), 'O':player(minimax_search)})


CPU times: user 3.07 s, sys: 30.7 ms, total: 3.1 s
Wall time: 3.15 s
Out[82]:
O X X
X O O
O X X

Heuristic Cutoffs


In [63]:
def cutoff_depth(d):
    """A cutoff function that searches to depth d."""
    return lambda game, state, depth: depth > d

def h_alphabeta_search(game, state, cutoff=cutoff_depth(6), h=lambda s, p: 0):
    """Search game to determine best action; use alpha-beta pruning.
    As in [Figure 5.7], this version searches all the way to the leaves."""

    player = state.to_move

    @cache1
    def max_value(state, alpha, beta, depth):
        if game.is_terminal(state):
            return game.utility(state, player), None
        if cutoff(game, state, depth):
            return h(state, player), None
        v, move = -infinity, None
        for a in game.actions(state):
            v2, _ = min_value(game.result(state, a), alpha, beta, depth+1)
            if v2 > v:
                v, move = v2, a
                alpha = max(alpha, v)
            if v >= beta:
                return v, move
        return v, move

    @cache1
    def min_value(state, alpha, beta, depth):
        if game.is_terminal(state):
            return game.utility(state, player), None
        if cutoff(game, state, depth):
            return h(state, player), None
        v, move = +infinity, None
        for a in game.actions(state):
            v2, _ = max_value(game.result(state, a), alpha, beta, depth + 1)
            if v2 < v:
                v, move = v2, a
                beta = min(beta, v)
            if v <= alpha:
                return v, move
        return v, move

    return max_value(state, -infinity, +infinity, 0)

In [54]:
%time play_game(TicTacToe(), {'X':player(h_alphabeta_search), 'O':player(h_alphabeta_search)})


CPU times: user 367 ms, sys: 7.9 ms, total: 375 ms
Wall time: 375 ms
Out[54]:
O X X
X O O
O X X

In [60]:
%time play_game(ConnectFour(), {'X':player(h_alphabeta_search), 'O':random_player}, verbose=True).utility


. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . X
. . . . . X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O X
. . . . . X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O X
. . . . X X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O .
. . . . . O X
. . . . X X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O .
. . . . X O X
. . . . X X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O .
. . . . X O X
. O . . X X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O .
. X . . X O X
. O . . X X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O O
. X . . X O X
. O . . X X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . X O O
. X . . X O X
. O . . X X O

. . . . . . .
. . . . . . .
. . . . . . .
. . . . X O O
. X . . X O X
. O . O X X O

. . . . . . .
. . . . . . .
. . . . . X .
. . . . X O O
. X . . X O X
. O . O X X O

. . . . . . .
. . . . . O .
. . . . . X .
. . . . X O O
. X . . X O X
. O . O X X O

. . . . . . .
. . . . . O .
. . . . . X .
. X . . X O O
. X . . X O X
. O . O X X O

. . . . . . .
. . . . . O .
. . . . . X O
. X . . X O O
. X . . X O X
. O . O X X O

. . . . . . .
. . . . . O .
. X . . . X O
. X . . X O O
. X . . X O X
. O . O X X O

. . . . . . .
. . . . . O .
. X . . . X O
. X . . X O O
. X . . X O X
. O O O X X O

. . . . . . .
. . . . . O .
. X . . . X O
. X . . X O O
. X . . X O X
X O O O X X O

. . . . . . .
. . . . . O O
. X . . . X O
. X . . X O O
. X . . X O X
X O O O X X O

. . . . . . X
. . . . . O O
. X . . . X O
. X . . X O O
. X . . X O X
X O O O X X O

. . . . . . X
. . . . . O O
. X . . . X O
. X . . X O O
O X . . X O X
X O O O X X O

. . . . . X X
. . . . . O O
. X . . . X O
. X . . X O O
O X . . X O X
X O O O X X O

. . . . . X X
. . . . . O O
. X . . . X O
. X . . X O O
O X . O X O X
X O O O X X O

. . . . . X X
. . . . . O O
. X . . . X O
. X . X X O O
O X . O X O X
X O O O X X O

. . . . . X X
. . . . . O O
. X . . O X O
. X . X X O O
O X . O X O X
X O O O X X O

. . . . . X X
. . . . . O O
. X . X O X O
. X . X X O O
O X . O X O X
X O O O X X O

. . . . . X X
. . . . O O O
. X . X O X O
. X . X X O O
O X . O X O X
X O O O X X O

. . . . . X X
. . . X O O O
. X . X O X O
. X . X X O O
O X . O X O X
X O O O X X O

. . . . O X X
. . . X O O O
. X . X O X O
. X . X X O O
O X . O X O X
X O O O X X O

. . . X O X X
. . . X O O O
. X . X O X O
. X . X X O O
O X . O X O X
X O O O X X O

CPU times: user 8.82 s, sys: 146 ms, total: 8.96 s
Wall time: 9.19 s
Out[60]:
1

In [61]:
%time play_game(ConnectFour(), {'X':player(h_alphabeta_search), 'O':player(h_alphabeta_search)}, verbose=True).utility


. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O .
. . . . . X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O .
. . . . X X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . O .
. . O . X X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . X O .
. . O . X X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . X O .
. . O O X X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . X O .
. X O O X X .

. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. O . . X O .
. X O O X X .

. . . . . . .
. . . . . . .
. . . . . . .
. X . . . . .
. O . . X O .
. X O O X X .

. . . . . . .
. . . . . . .
. O . . . . .
. X . . . . .
. O . . X O .
. X O O X X .

. . . . . . .
. . . . . . .
. O . . . . .
. X . . . . .
. O . . X O .
X X O O X X .

. . . . . . .
. . . . . . .
. O . . . . .
. X . . . . .
O O . . X O .
X X O O X X .

. . . . . . .
. . . . . . .
. O . . . . .
. X . . X . .
O O . . X O .
X X O O X X .

. . . . . . .
. . . . . . .
. O . . O . .
. X . . X . .
O O . . X O .
X X O O X X .

. . . . . . .
. . . . X . .
. O . . O . .
. X . . X . .
O O . . X O .
X X O O X X .

. . . . . . .
. . . . X . .
. O . . O . .
. X . . X O .
O O . . X O .
X X O O X X .

. . . . . . .
. . . . X . .
. O . . O X .
. X . . X O .
O O . . X O .
X X O O X X .

. . . . . . .
. . . . X . .
. O . . O X .
. X . . X O .
O O O . X O .
X X O O X X .

. . . . . . .
. . . . X . .
. O . . O X .
. X . . X O .
O O O X X O .
X X O O X X .

. . . . . . .
. . . . X O .
. O . . O X .
. X . . X O .
O O O X X O .
X X O O X X .

. . . . . . .
. . . . X O .
. O . . O X .
. X . X X O .
O O O X X O .
X X O O X X .

. . . . . . .
. . . . X O .
. O . O O X .
. X . X X O .
O O O X X O .
X X O O X X .

. . . . . . .
. . . X X O .
. O . O O X .
. X . X X O .
O O O X X O .
X X O O X X .

. . . O . . .
. . . X X O .
. O . O O X .
. X . X X O .
O O O X X O .
X X O O X X .

. . . O . . .
. . . X X O .
. O . O O X .
. X X X X O .
O O O X X O .
X X O O X X .

CPU times: user 12.1 s, sys: 237 ms, total: 12.4 s
Wall time: 12.9 s
Out[61]:
1

In [83]:
class CountCalls:
    """Delegate all attribute gets to the object, and count them in ._counts"""
    def __init__(self, obj):
        self._object = obj
        self._counts = Counter()
        
    def __getattr__(self, attr):
        "Delegate to the original object, after incrementing a counter."
        self._counts[attr] += 1
        return getattr(self._object, attr)
    
def report(game, searchers):
    for searcher in searchers:
        game = CountCalls(game)
        searcher(game, game.initial)
        print('Result states: {:7,d}; Terminal tests: {:7,d}; for {}'.format(
            game._counts['result'], game._counts['is_terminal'], searcher.__name__))
    
report(TicTacToe(), (alphabeta_search_tt,  alphabeta_search, h_alphabeta_search, minimax_search_tt))


Result states:   6,589; Terminal tests:   3,653; for alphabeta_search_tt
Result states:  25,703; Terminal tests:  25,704; for alphabeta_search
Result states:   4,687; Terminal tests:   2,805; for h_alphabeta_search
Result states:  16,167; Terminal tests:   5,478; for minimax_search_tt

Monte Carlo Tree Search


In [ ]:
class Node:
    def __init__(self, parent, )
def mcts(state, game, N=1000):

Heuristic Search Algorithms


In [ ]:


In [ ]:


In [ ]:


In [ ]:
t = CountCalls(TicTacToe())
    
play_game(t, dict(X=minimax_player, O=minimax_player), verbose=True)
t._counts

In [ ]:
for tactic in (three, fork, center, opposite_corner, corner, any):
    for s in squares:
        if tactic(board, s,player): return s
    for s ins quares:
        if tactic(board, s, opponent): return s

In [ ]:
def ucb(U, N, C=2**0.5, parentN=100):
    return round(U/N + C * math.sqrt(math.log(parentN)/N), 2)

{C: (ucb(60, 79, C), ucb(1, 10, C), ucb(2, 11, C)) 
    for C in (1.4, 1.5)}

In [ ]:
def ucb(U, N, parentN=100, C=2):
    return U/N + C * math.sqrt(math.log(parentN)/N)


C = 1.4 

class Node:
    def __init__(self, name, children=(), U=0, N=0, parent=None, p=0.5):
        self.__dict__.update(name=name, U=U, N=N, parent=parent, children=children, p=p)
        for c in children:
            c.parent = self
            
    def __repr__(self):
        return '{}:{}/{}={:.0%}{}'.format(self.name, self.U, self.N, self.U/self.N, self.children)
    
def select(n):
    if n.children:
        return select(max(n.children, key=ucb))
    else:
        return n
    
def back(n, amount):
    if n:
        n.N += 1
        n.U += amount
        back(n.parent, 1 - amount)
    
    
def one(root): 
    n = select(root)
    amount = int(random.uniform(0, 1) < n.p)
    back(n, amount)
        
def ucb(n): 
    return (float('inf') if n.N == 0 else
            n.U / n.N + C * math.sqrt(math.log(n.parent.N)/n.N))


tree = Node('root', [Node('a', p=.8, children=[Node('a1', p=.05), 
                                               Node('a2', p=.25,
                                                    children=[Node('a2a', p=.7), Node('a2b')])]),
                     Node('b', p=.5, children=[Node('b1', p=.6,
                                                   children=[Node('b1a', p=.3), Node('b1b')]), 
                                               Node('b2', p=.4)]),
                     Node('c', p=.1)])

for i in range(100):
    one(tree); 
for c in tree.children: print(c)
'select', select(tree), 'tree', tree

In [ ]:
us = (100, 50, 25, 10, 5, 1)
infinity = float('inf')

@lru_cache(None)
def f1(n, denom):
    return (0 if n == 0 else
            infinity if n < 0 or not denom else
            min(1 + f1(n - denom[0], denom),
                    f1(n, denom[1:])))
    
@lru_cache(None)
def f2(n, denom):
    @lru_cache(None)
    def f(n):
        return (0 if n == 0 else
                infinity if n < 0 else
                1 + min(f(n - d) for d in denom))
    return f(n)

@lru_cache(None)
def f3(n, denom):
    return (0 if n == 0 else
            infinity if n < 0 or not denom else
            min(k + f2(n - k * denom[0], denom[1:]) 
                for k in range(1 + n // denom[0])))
    

def g(n, d=us): return f1(n, d), f2(n, d), f3(n, d)
    
n = 12345
%time f1(n, us)
%time f2(n, us)
%time f3(n, us)

In [ ]: