In [0]:
import numpy as np

class state:
  def is_end_state():
    return True
  def possible_moves():
    return []

class connectX:
  def __init__(self, nrows=6, ncols=7, inrow=4, player1, player2, exp1=1, exp2=1, tag1=1, tag2=2):
    self.nrows = nrows
    self.ncols = ncols
    self.inrow = inrow
    self.state, self.winner, self.turn = self.init_game()
    self.players = {1: player1(tag1, exploration_factor=exp1), 
                    2: player2(tag2, exploration_factor=exp2)}
    self.memory = {}

  def init_game(self):
    return np.zeros((self.nrows, self.ncols)), None, 1

  def next_player(self):
    if self.turn == 1:
      self.turn = 2
    else:
      self.turn = 1

  def game_winner(self):
    for i in range(len(self.state[:, 0])-xxxx):
      for j in range(len(self.state[0, :])-):
        self.square_winner()
        if self.winner is not None:

  def square_winner(self, square):
    s = np.append([np.sum(sqare, axis=0), np.sum(square, axis=1).T],
                  [np.trace(square), np.flip(square, axis=1).trace()])
    if np.max(s) == self.inrow:
      self.winner = 1
    elif np.min(s) == -self.inrow:
      self.winner = 2
    else:
      self.winner = None
    return self.winner
  
  def play_game(self, learn=False):
    move_count = 0
    while self.winner == None:
      move = self.play_move(learn)
      self.state = self.make_state_from_move(move)
      self.game_winner()
      self.next_player()
      move_count += 1

    self.play_move(learn)
    self.next_player()
    self.play_move(learn)
    self.next_player()

    return self.winner, move_count
  
  def play_move(self, learn):
    player = self.players[self.turn]
    move = player.choose_move(self.state, self.winner, learn)
    return move

  def play_multiple_games(self, episodes, learn):
    statistics = {1: 0, 2: 0, 0: 0, 'move_count': 0}
    move_count_total = []
    for i in range(episodes):
        winner, move_count = self.play_game(learn)
        move_count_total.append(move_count)
        statistics[winner] = statistics[winner] + 1

        self.state, self.winner, self.turn = self.init_game()

    if isinstance(self.players[1], TicRLAgent):
      self.players[1].save_values()
    if isinstance(self.players[2], TicRLAgent):
      self.players[2].save_values()

    if learn is True and isinstance(self.players[1], MCTSAgent):
      self.players[1].save_tree()
    if learn is True and isinstance(self.players[2], MCTSAgent):
      self.players[2].save_tree()

    statistics['move_count'] = np.mean(move_count_total)
    return statistics


  File "<ipython-input-1-a3855cf28c37>", line 29
    for j in range(len(self.state[0, :])-):
                                         ^
SyntaxError: invalid syntax

In [0]:
episodes = 300

game = connectX(6, 7, DQNAgent, MCTSAgent, 0.8, 0.8)
statistics = game.play_multiple_games(episodes, learn=False)

In [0]:
class DQNAgent:
  def __init__(self, tag, exploration_factor=1):
    self.tag = tag
    self.exp_factor = exploration_factor

  def choose_move(self, state, winner, learn):

In [0]:
class MCTSAgent:
  def __init__(self, tag, exploration_factor=1):
    self.tag = tag
    self.exp_factor = exploration_factor
    global t
    global con_tree

  def choose_move(self, state, winner, learn):

  def expand_opp_move(self, state, learn):
    if self.exp_factor == 0 or self.expand_flag is False:
      return

    prev_state =

In [0]:
def minimax(state, max_depth, is_player_minimizer):
  if max_depth == 0 or state.is_end_state():
    return evaluation_function(state)
  if is_player_minimizer:
    value = -math.inf
    for move in state.possible_moves():
      evaluation = minimax(move, max_depth - 1, False)
      min = min(value, evaluation)
    return value
  value = math.inf
  for move in state.possible_moves():
    evaluation = minimax(move, max_depth - 1, True)
    max = max(value, evaluation)
  return value

In [0]:
def agent(observation, configuration):
    # Number of Columns on the Board.
    columns = configuration.columns
    # Number of Rows on the Board.
    rows = configuration.rows
    # Number of Checkers "in a row" needed to win.
    inarow = configuration.inarow
    # The current serialized Board (rows x columns).
    board = observation.board
    # Which player the agent is playing as (1 or 2).
    mark = observation.mark

    # Return which column to drop a checker (action).
    return 0

In [0]:
def minimax(state, max_depth, is_player_minimizer, alpha, beta):
  if max_depth == 0 or state.is_end_state():
    return evaluation_function(state)
  if is_player_minimizer:
    value = -math.inf
    for move in state.possible_moves():
      evaluation = minimax(move, max_depth - 1, False, alpha , beta)
      min = min(value, evaluation)
      beta = min(beta, evaluation)
      if beta <= alpha:
        break
      return value  
  value = math.inf
  for move in state.possible_moves():
    evaluation = minimax(move, max_depth - 1, True, alpha, beta)
    max = max(value, evaluation)
    alpha = max(alpha, evaluation)
    if beta <= alpha:
      break
    return value

In [0]: