In [0]:
import numpy as np
class state:
def is_end_state():
return True
def possible_moves():
return []
class connectX:
def __init__(self, nrows=6, ncols=7, inrow=4, player1, player2, exp1=1, exp2=1, tag1=1, tag2=2):
self.nrows = nrows
self.ncols = ncols
self.inrow = inrow
self.state, self.winner, self.turn = self.init_game()
self.players = {1: player1(tag1, exploration_factor=exp1),
2: player2(tag2, exploration_factor=exp2)}
self.memory = {}
def init_game(self):
return np.zeros((self.nrows, self.ncols)), None, 1
def next_player(self):
if self.turn == 1:
self.turn = 2
else:
self.turn = 1
def game_winner(self):
for i in range(len(self.state[:, 0])-xxxx):
for j in range(len(self.state[0, :])-):
self.square_winner()
if self.winner is not None:
def square_winner(self, square):
s = np.append([np.sum(sqare, axis=0), np.sum(square, axis=1).T],
[np.trace(square), np.flip(square, axis=1).trace()])
if np.max(s) == self.inrow:
self.winner = 1
elif np.min(s) == -self.inrow:
self.winner = 2
else:
self.winner = None
return self.winner
def play_game(self, learn=False):
move_count = 0
while self.winner == None:
move = self.play_move(learn)
self.state = self.make_state_from_move(move)
self.game_winner()
self.next_player()
move_count += 1
self.play_move(learn)
self.next_player()
self.play_move(learn)
self.next_player()
return self.winner, move_count
def play_move(self, learn):
player = self.players[self.turn]
move = player.choose_move(self.state, self.winner, learn)
return move
def play_multiple_games(self, episodes, learn):
statistics = {1: 0, 2: 0, 0: 0, 'move_count': 0}
move_count_total = []
for i in range(episodes):
winner, move_count = self.play_game(learn)
move_count_total.append(move_count)
statistics[winner] = statistics[winner] + 1
self.state, self.winner, self.turn = self.init_game()
if isinstance(self.players[1], TicRLAgent):
self.players[1].save_values()
if isinstance(self.players[2], TicRLAgent):
self.players[2].save_values()
if learn is True and isinstance(self.players[1], MCTSAgent):
self.players[1].save_tree()
if learn is True and isinstance(self.players[2], MCTSAgent):
self.players[2].save_tree()
statistics['move_count'] = np.mean(move_count_total)
return statistics
In [0]:
episodes = 300
game = connectX(6, 7, DQNAgent, MCTSAgent, 0.8, 0.8)
statistics = game.play_multiple_games(episodes, learn=False)
In [0]:
class DQNAgent:
def __init__(self, tag, exploration_factor=1):
self.tag = tag
self.exp_factor = exploration_factor
def choose_move(self, state, winner, learn):
In [0]:
class MCTSAgent:
def __init__(self, tag, exploration_factor=1):
self.tag = tag
self.exp_factor = exploration_factor
global t
global con_tree
def choose_move(self, state, winner, learn):
def expand_opp_move(self, state, learn):
if self.exp_factor == 0 or self.expand_flag is False:
return
prev_state =
In [0]:
def minimax(state, max_depth, is_player_minimizer):
if max_depth == 0 or state.is_end_state():
return evaluation_function(state)
if is_player_minimizer:
value = -math.inf
for move in state.possible_moves():
evaluation = minimax(move, max_depth - 1, False)
min = min(value, evaluation)
return value
value = math.inf
for move in state.possible_moves():
evaluation = minimax(move, max_depth - 1, True)
max = max(value, evaluation)
return value
In [0]:
def agent(observation, configuration):
# Number of Columns on the Board.
columns = configuration.columns
# Number of Rows on the Board.
rows = configuration.rows
# Number of Checkers "in a row" needed to win.
inarow = configuration.inarow
# The current serialized Board (rows x columns).
board = observation.board
# Which player the agent is playing as (1 or 2).
mark = observation.mark
# Return which column to drop a checker (action).
return 0
In [0]:
def minimax(state, max_depth, is_player_minimizer, alpha, beta):
if max_depth == 0 or state.is_end_state():
return evaluation_function(state)
if is_player_minimizer:
value = -math.inf
for move in state.possible_moves():
evaluation = minimax(move, max_depth - 1, False, alpha , beta)
min = min(value, evaluation)
beta = min(beta, evaluation)
if beta <= alpha:
break
return value
value = math.inf
for move in state.possible_moves():
evaluation = minimax(move, max_depth - 1, True, alpha, beta)
max = max(value, evaluation)
alpha = max(alpha, evaluation)
if beta <= alpha:
break
return value
In [0]: