In [1]:
    
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
    
In [2]:
    
class Minmax(object):
    
    '''
    This is a simple MinMax algorithm with score-Hashing and Alpha Beta pruning.
    The score evaluates a heuristic.
    '''
    
    count = 0
    DIC ={}
    
    def __init__(self):
        
        #load a DIC with some precalculated values values
        try: 
            self.DIC = self.load_obj('scores')
        except (RuntimeError, TypeError, NameError, OSError, IOError):
            self.DIC  = {}
            print '--> no scores.pkl available. Now it takes way longer to build the tree!!'
    def save_obj(self, obj, name ):
        import pickle
        '''
        save a data object
        '''
        with open(name + '.pkl', 'wb') as f:
            pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
    def load_obj(self, name):
        '''
        loads a data object
        '''
        import pickle
        with open(name + '.pkl', 'rb') as f:
            return pickle.load(f)
    
    def get_hash_key(self, board):
        '''
        This function creates a hashkey for a board so it can be stored in the dictionary.
        '''
        b=np.ndarray.flatten(board)
        string_flat=np.char.mod('%d', b)
        key = "".join(string_flat)
        return key
    
    
    #heuristic to evaluate the next best move
    def score(self, state, depth, cur_player):
        '''
        heuristic function that uses Hashing for the scores if they are already calculated
        
        state: current gameState
        depth: current depth
        cur_player: current player
        
        we are using a simple heuristic here,
        just counting and punishing good and reward good moves with a weight.
        
        score = num of(four in row)*1000+ num of(three in row)* 100+ num of(two in row)*10
        - num of opponent(four in row)*100000- num of opponent(three in row)*100- 
        num of opponent(two in row)*10
                
        returns the score
        '''
        
        if cur_player == 1:
            oponent = 2
        else:
            oponent = 1
            
        hash_key = self.get_hash_key(state)
        
        # Score already calculated
        if hash_key in self.DIC:
            return self.DIC[hash_key]
        
        # else calculate
        else:
                
            #counting the number of good four/threes/twos in a row/column/diag
            _ , b = zip(*(move_was_winning_move(state, cur_player, e) for e in range(2,5)))
            _ , c = zip(*(move_was_winning_move(state, oponent, f) for f in range(2,5)))
            
            score = b[2]*1000+b[1]*100+b[0]*10-c[2]*100000-c[1]*100-c[0]*10
            
            #and put in DIC
            self.DIC[hash_key]  = score
            return score
    
    def listofmoves(self, gameState):
        '''
        returns a list of possible moves = column not full and orders it with middle first
        
        gameState: current gamestate
        
        return: list of possible moves
        '''
        l=[]
        for i in range(gameState.shape[1]):
            if 0 in gameState[:,i]:
                l.append(i)
        
        m=sum(l)/len(l)
        return sorted(l, key=lambda x:abs(x-m))
    
    def min_play(self, board, depth, alpha ,beta , cur_player):
        '''
        recursively building a the tree, min part of minmax
        minimzing the score, moving the oponent
        
        board: a gameState 
        depth: depth parameter of search tree
        cur_player: the current_player's number
        
        return best score
        '''
        self.count = self.count+1
        #eval current player
        if cur_player == 1:
            oponent = 2
        else:
            oponent = 1
            
        #termination
        #max depth
        if depth == 0:
            return self.score(board, depth, cur_player)
        #all full
        if not move_still_possible(board):
            return self.score(board, depth, cur_player)
        #or winner
        
        winningmove, _ = move_was_winning_move(board, cur_player, 4)
        if winningmove:
            return self.score(board, depth, cur_player)
        
        
        best_score = np.inf
        
        #get all available moves
        moves = self.listofmoves(board)
        best_move = moves[0]
        
        if len(moves) == 0:
            return self.score(board, depth, cur_player)
        
        for eachmove in moves:
            #copy board and move oponent
            boardcopy = board.copy()
            
            board_i, _ , _ = move(boardcopy, oponent, eachmove)
            
            #build recursivley max
            score = self.max_play(board_i, depth-1,alpha ,beta , cur_player)
                        
            #compare scores MIN
            #if score < best_score:
            #    best_move = eachmove
            #    best_score = score
                #print 'if', best_move, best_score
                
            #with alpha - beta
            if score < beta:
                beta = score
                #best_move = eachmove
            if beta <= alpha:
                return beta
        
        return beta#, best_move
    
    
    def max_play(self, board, depth, alpha ,beta , cur_player):
        '''
        recursively building a the tree, max part of minmax
        maximizing the score
        
        board: a gameState 
        depth: depth parameter of search tree
        cur_player: the current_player's number
        
        return best score
        '''
        self.count = self.count+1
        #eval current player
        if cur_player == 1:
            oponent = 2
        else:
            oponent = 1
            
        #termination
        #max depth
        if depth == 0:
            return self.score(board, depth, cur_player)
        #all full
        elif not move_still_possible(board):
            return self.score(board, depth, cur_player)
        #or winner
        winningmove, _ = move_was_winning_move(board, cur_player, 4)
        if winningmove:
            return self.score(board, depth, cur_player)
                
        
        best_score = -np.inf
        
        #get all available moves
        moves = self.listofmoves(board)
        best_move = moves[0]
        
        if len(moves) == 0:
            return self.score(board, depth, cur_player)
        
        for eachmove in moves:
            #copy board and move player
            boardcopy = board.copy()
            
            board_i, _ , _ = move(boardcopy, cur_player, eachmove)
            
            #build recursivley min
            score = self.min_play(board_i, depth-1,alpha ,beta , cur_player)
                        
            #compare scores MAX
            #if score > best_score:
            #    best_move = eachmove
            #    best_score = score
                #print 'if', best_move, best_score
            
            #with alpha-beta
            if score > alpha:
                alpha = score
                #best_move = eachmove
            if alpha >= beta:
                return alpha
        
        return alpha #, best_move
    
    
    def minmax(self, board, cur_player, depth=4, alpha=-np.inf, beta=np.inf):
        '''
        recursively building a the tree with alpha beta pruning
        may not be the best choice but everyone keeps saying: memory is cheap
        
        board: a gameState 
        depth: depth parameter of search tree
        cur_player: the current_player's number
        
        return best score, best move
        '''
        self.count = self.count+1
        
        #eval current player
        if cur_player == 1:
            oponent = 2
        else:
            oponent = 1
        
        best_score = -np.inf
        
        #get all available moves
        moves = self.listofmoves(board)
        best_move = moves[0]
        #for each move do
        for eachmove in moves:
            #print eachmove
            #copy board and move
            boardcopy = board.copy()               
                
            #build recursivley
            board_i, _ , _ = move(boardcopy, cur_player, eachmove)
            
            score = self.min_play(board_i, depth-1 ,alpha ,beta,  cur_player)
                        
            #compare scores
            if score > alpha:
                alpha = score
                best_move = eachmove
            if alpha >= beta:
                return alpha
        
        return alpha, best_move
    
In [3]:
    
#import numpy as np
def move_is_correct(grid,num):
    '''
    @param grid: 6x7 grid containing the current game state
    @param num: column
    returns True if move is allowed on that column
    '''
    #if 0 is in column
    if 0 in grid[:,num]:
    
        #move is allowed
        return True
    else:
        return False
def move_still_possible(S):
    '''
    @param S: 6x7 grid containing the current game state
    returns True if grid contains no 0, therefore no move possible anymore
    '''
    return not(S[S==0].size == 0)
def move(S,p,col_num):
    '''
    @param S: 6x7 grid containing the current game state
    @param p: current player
    @param col_num: column number
    
    sets the player's number on the grid and returns the grid
    '''
    
    #sanity check
    if 0 in S[:,col_num]:    
        y = np.where(S[:,col_num]==0)[0][-1]
        S[y,col_num] = p
        return S , y, col_num
    else:
        return S, None, None
        return 
def move_at_random(S):
    '''
    @param S: 6x7 grid containing the current game state
    moves at random
    '''
    return np.random.randint(0,S.shape[1])
#neat and ugly but the fastest way to search a matrix for a vector is a string find
player1=[' ','1', '1 1', '1 1 1', '1 1 1 1']
oponent=[' ','1', '2 2', '2 2 2', '2 2 2 2']
def move_was_winning_move(S, p, num):
    '''
    @param S: 6x7 grid containing the current game state
    @param p: current player
    @param num: how many occurences to count
    
    combines all the allowed formations of the grid and string_finds with 
    the currents player vector. Returns true if match.
    
    return: True or False whether move was winning move or not,
            and count of occurences
    '''
    if p == 1:
        match = player1[num]
    else:
        match = oponent[num]
    l=[]
    #for every possible diag
    for i in range(-2,4):
        l.append(np.diag(S,k = i))
        l.append(np.diag(np.fliplr(S),k=i))
    #left to right
    l.append(S)
    #top to bottom
    l.append(np.rot90(S)) 
    
    #convert to string
    stringmatrix =''.join(np.array_str(e) for e in l)
    
    #count the occurences
    counter = stringmatrix.count(match)
    
    #print stringmatrix
    
    #if four in a row
    if num == 4 and counter == 1:
        return True, counter
    return False, counter
# relate numbers (1, -1, 0) to symbols ('x', 'o', ' ')
symbols = {1:'m', 2:'r', 0:' '}
# print game state matrix using symbols
def print_game_state(S):
    B = np.copy(S).astype(object)
    for n in [1, 2, 0]:
        B[B==n] = symbols[n]
    print B
if __name__ == '__main__':
    # initialize 6x7 connectfour board
        # initialize array that counts how often a cell contributed to a win
    final = np.zeros((6,7), dtype = float)  
    outcomes = []
    mvcntrlist = []
    nodeslist = []
    
    print 'This is a Minmax vs Random Connect Four simulation: '
    difficulty = int(raw_input('Difficulty: '))
    
    for i in range(10):
        
        gameState = np.zeros((6,7), dtype=int)
        # initialize player number, move counter
        player = 1
        mvcntr = 1
        # initialize flag that indicates win
        noWinnerYet = True
        m = Minmax()
        size = len(m.DIC)
        while move_still_possible(gameState) and noWinnerYet:
            while True:
                # get player symbol
                name = symbols[player]
                #print '%s moves' % name
                # move with Minmax
                if player == 1:
                    _ , col_num = m.minmax(gameState, 1, difficulty, -np.inf, np.inf)
                
                # let player move at random
                else:
                    col_num = move_at_random(gameState)
                if move_is_correct(gameState, col_num):
                    gameState, _ , _ = move(gameState,player,col_num)
                    # print current game state
                    #print_game_state(gameState)
                    # evaluate game state
                    winningmove, _ = move_was_winning_move(gameState, player, 4)
                    if winningmove:
                        
                        # determine who won
                        winner = player
                        
                        # get the positions of the fields the winner selected
                        xp, yp = np.where(gameState == winner)
                        
                        # update "final" which counts how often a cell contributed
                        # to a win
                        final[xp,yp] +=1
                        #for i in range(0, len(xp)):
                        #    final[xp[i], yp[i]] += 1
                
                        noWinnerYet = False
                        
                        #add to list which winner
                        outcomes.append(player)
                        #how many moves
                        mvcntrlist.append(mvcntr)
                        #how many nodes
                        nodeslist.append(m.count)
                    # switch player and increase move counter
                    if player == 1:
                        player = 2
                    elif player == 2:
                        player = 1
                    mvcntr +=  1
                    break
        if noWinnerYet:
            outcomes.append(0)
            #save new DIC to increase
        if size < len(m.DIC):
            m.save_obj(m.DIC,'scores')
        
            #print 'game ended in a draw' 
        print i
    print 'length of DIC:',len(m.DIC)
    print 'number of nodes:', m.count
    print 'Finished'
    
    # normalize the count data and store it on disk
    normed_final = final / np.sum(final, dtype=np.float)
    print normed_final
    np.savetxt("normed_count.csv", normed_final, delimiter=",")
    
    
In [4]:
    
data = normed_final
fig, ax = plt.subplots()
heatmap = ax.pcolor(data)
cbar = plt.colorbar(heatmap)
ax.set_xticks([])
ax.set_yticks([])
ax.invert_yaxis()
plt.title('Heatmap after 200 games  \n')
plt.show()
    
    
In [6]:
    
his = plt.hist(outcomes,bins=3)
offset = -.18
plt.title("1st Tournament, draws and wins, depth="+str(difficulty))
#plt.xlabel("left: o wins, middle: draw, right: x wins")
plt.ylabel("# Games") 
axes = plt.gca()
axes.set_ylim([1,len(outcomes)+1]) # y axis should include all 2000 games
#axes.set_xlim([0,0])
axes.set_xticks(his[1][1:]+offset)
axes.set_xticklabels( ('Random', 'Minmax', 'draw') )
    
    Out[6]:
    
In [7]:
    
outcomes.count(0), outcomes.count(1), outcomes.count(2)
    
    Out[7]:
In [8]:
    
his = plt.hist(mvcntrlist,bins=max(mvcntrlist))
plt.title("1st Tournament, # of moves, depth="+str(difficulty))
#plt.xlabel("left: o wins, middle: draw, right: x wins")
plt.ylabel("# Games")
plt.xlabel('# of moves')
axes = plt.gca()
    
    
In [9]:
    
his = plt.hist(nodeslist,bins=max(nodeslist))
plt.title("1st Tournament, # of nodes , depth="+str(difficulty))
#plt.xlabel("left: o wins, middle: draw, right: x wins")
plt.ylabel("# Games")
plt.xlabel('# of nodes')
axes = plt.gca()
    
    
with command: python -m cProfile -s cumtime connectfour-withMinmax.py
11008374 function calls (10559783 primitive calls) in 14.654 seconds
10845027 function calls (10398214 primitive calls) in 12.555 seconds
8587527 function calls (8188635 primitive calls) in 7.127 seconds
3336653 function calls (3330229 primitive calls) in 4.693 seconds
10010685 function calls (9583101 primitive calls) in 9.372 seconds
19532712 function calls (18898637 primitive calls) in 24.994 seconds
69043885 function calls (67314936 primitive calls) in 118.056 seconds
In [14]:
    
import time
def timing(f):
    def wrap(*args):
        time1 = time.time()
        ret = f(*args)
        time2 = time.time()
        print '%s function took %0.3f ms' % (f.func_name, (time2-time1)*1000.0)
        return ret
    return wrap
    
In [15]:
    
testgame=np.array([[0,0,0,0,0,0,0],
 [0,0,0,0,0,0,0],
 [0,0,0,0,0,0,0],
 [0,0,0,0,0,0,0],
 [0,0,0,0,0,0,0],
 [0,0,0,0,0,0,0]])
    
In [21]:
    
m = Minmax()
@timing
def do_work(dif):
    return m.minmax(testgame, 1, dif, -np.inf, np.inf)
    
In [25]:
    
for i in range(1,7):
    do_work(i)
    
    
In [33]:
    
t=[0,1.235,34.304,94.698,288.690,2281.134,3482.528]
    
In [37]:
    
plt.plot(t)
plt.title('Time per Depth for building Minmax tree')
plt.xlabel('depth')
plt.ylabel('time in ms')
plt.xlim(1,7)
    
    Out[37]:
    
In [ ]: