In [1]:
%matplotlib inline
import axelrod as axl
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import sympy as sym
import itertools
import csv
import os
import glob
from mpl_toolkits.axes_grid1 import make_axes_locatable
from pandas.util.testing import assert_frame_equal
import math
import imp
players = imp.load_source('players', '../src/players.py')
generate_cache = imp.load_source('players', '../src/generate_cache.py')
theoretic = imp.load_source('theoretic', '../src/theoretic.py')
abbreviations = imp.load_source('abbreviations', '../src/abbreviations.py')
assert axl.__version__ == '2.9.0'
assert pd.__version__ == '0.19.2'
assert matplotlib.__version__ == '2.0.0'
assert sns.__version__ == '0.7.1'
assert np.__version__ == '1.12.1'
assert sym.__version__ == '1.0'
Here are all the strategies used in this experiment:
In [2]:
from abbreviations import abbreviations
def abbreviate(player_name, abbreviations=abbreviations):
"""
Return the abbreviated name of a play if one has been given
"""
if isinstance(player_name, axl.Player):
player_name = str(player_name)
return abbreviations.get(player_name, player_name)
assert abbreviate("Tit For Tat") == "TfT"
assert abbreviate("Random: 0.5") == "Random"
assert abbreviate(axl.TitForTat()) == "TfT"
In [3]:
from players import selected_players
players_of_interest = set([p for p in selected_players()
if ("length" not in p.classifier["makes_use_of"]) and
("Incorrect" not in abbreviate(p))])
assert len(players_of_interest) == 164
These players have been omitted from the analysis (even though their data was collected). This is because they use the match length or were entered in to the original data sweep by error (the FSM strategies do not use the correct initial state).
In [4]:
set(map(str, selected_players())) - set(map(str, players_of_interest))
Out[4]:
In [5]:
with open("../data/reference_keys.csv", "r") as f:
reader = csv.reader(f)
reference_keys = {player: eval(keys) for player, keys in reader}
assert reference_keys['ALLCorALLD'] == ['axelrodproject']
assert reference_keys['Cooperator'] == ['Axelrod1984', 'Mittal2009', 'Press2012']
In [6]:
with open("../tex/list_of_players.tex", "w") as f:
for player in sorted(players_of_interest, key=str):
latex_name = "{}".format(player).replace("_", "\_")
f.write("\item {}".format(latex_name))
abbreviation = abbreviate(player)
if abbreviation != player.name:
f.write("(\\textbf{{{}}})".format(abbreviation))
if player.classifier["stochastic"]:
f.write(" - \\textit{Stochastic}")
else:
f.write(" - \\textit{Deterministic}")
try:
mem = int(player.classifier["memory_depth"])
except OverflowError:
mem = "\(\infty\)"
if player.name == "Grudger":
mem = 1 # Overwrite incorrect classification
f.write(" - \\textit{{Memory depth}}: {}".format(mem))
try:
f.write(". \cite{{{}}}\n".format(", ".join(sorted(reference_keys[str(player)]))))
except KeyError:
f.write(".\n")
Here are some summary information about the strategies
In [7]:
def clean_mem(n):
try:
return int(n)
except OverflowError:
return -1
player_info = pd.DataFrame([[abbreviate(p), p.classifier["stochastic"], clean_mem(p.classifier['memory_depth'])]
for p in players_of_interest],
columns=["Player", "Stochastic", "Memory Depth"])
In [8]:
temp_df = pd.DataFrame(player_info.groupby("Stochastic")["Player"].count()).reset_index().rename(columns={"Player": "Count"})
for bool in [True, False]:
filename = "../tex/num_stochastic.tex" if bool else "../tex/num_deterministic.tex"
with open(filename, "w") as f:
num = temp_df[temp_df["Stochastic"] == bool]["Count"].iloc[0]
print(num, bool)
f.write(str(num))
with open("../tex/num_strategies.tex", "w") as f:
num = len(players_of_interest)
f.write(str(num))
print(num)
with open("../tex/num_strategies_axelrod.tex", "w") as f:
num = len(axl.strategies)
f.write(str(num))
print(num)
In [9]:
mem_df = pd.DataFrame(player_info.groupby("Memory Depth")["Player"].count()).transpose()
mem_df.rename(index={"Player": "Count"}, inplace=True)
cols = mem_df.columns.tolist()
mem_df = mem_df[cols[1:] + [cols[0]]]
mem_df
Out[9]:
In [10]:
def clean_latex(string):
"""Replace some special carachters"""
string = string.replace("textbackslashpi", "pi")
string = string.replace("textbackslashphi", "phi")
string = string.replace("\\$", "$")
string = string.replace("\\$", "$")
string = string.replace("\\textasciicircum", "^")
string = string.replace("\_", "_")
string = string.replace("2_2_2", "2\_2\_2")
string = string.replace("1_1_1", "1\_1\_1")
for i in range(1, 4):
string = string.replace("TF{}".format(i), "\\textbf{{TF{}}}".format(i))
return string
with open("../tex/memory_depth_count.tex", "w") as f:
string = clean_latex(mem_df.to_latex()).replace("-1", "\(\infty\)")
f.write(string)
In [11]:
cached_outcomes = pd.read_csv("../data/outcomes.csv", header=None,
names=["Player 1", "Player 2", "Score 1", "Score 2", "Iteration"])
cached_outcomes["Player 1"] = cached_outcomes["Player 1"].apply(lambda x: abbreviate(x))
cached_outcomes["Player 2"] = cached_outcomes["Player 2"].apply(lambda x: abbreviate(x))
cached_outcomes = cached_outcomes[cached_outcomes["Player 1"].isin(player_info["Player"]) &
cached_outcomes["Player 2"].isin(player_info["Player"])]
assert len(cached_outcomes.index) == 1169001
In [12]:
cached_outcomes.head()
Out[12]:
In [13]:
is_stochastic = dict(zip(player_info["Player"], player_info["Stochastic"]))
In [14]:
temp_df = pd.DataFrame(cached_outcomes.groupby(["Player 1", "Player 2"]).count()).reset_index()
temp_df["Outcome count"] = temp_df["Score 1"]
temp_df.drop(["Score 1", "Score 2", "Iteration"], axis=1, inplace=True)
In [15]:
temp_df["Stochastic"] = ((temp_df["Player 1"].map(lambda p: is_stochastic[p]) |
temp_df["Player 2"].map(lambda p: is_stochastic[p])) &
(temp_df["Outcome count"] > 1))
In [16]:
temp_df.head()
Out[16]:
Here we create a number of plots comparing the theoretic fixation probability with the observed fixation probability for a number of initial starting populations.
In [17]:
validation = pd.read_csv("../data/fixation_validation.csv", index_col=False)
In [18]:
validation.tail()
Out[18]:
In [19]:
markers = itertools.cycle(('X', '+', 'o'))
marker_size = 50
fontsize=16
for names, df in validation.groupby(["Player 1", "Player 2"]):
df.sort_values("N", inplace=True)
repetitions = df["Repetitions"].iloc[0]
assert all(df["Repetitions"] == repetitions)
# Get names instead of repr (to drop some parameters)
names = [abbreviate(name) for name in names]
title = "{} and {} over {} repetitions".format(*names, repetitions)
filename = "{}_v_{}".format(*names)
for substr in [": ", ".", ":", " "]:
filename = filename.replace(substr, "_")
plt.figure()
labels = ["1", "N / 2", "N - 1"]
index_sets = [df["i"] == 1, df["i"] == df["N"] / 2, df["i"] == df["N"] - 1]
for label, index in zip(labels, index_sets):
d = df[index]
plt.plot(d["N"], d["Theoretic"], label="")
plt.scatter(d["N"], d["Simulated"], marker=next(markers),
label="$x_{{{}}}$".format(label), s=marker_size)
plt.xticks(d["N"])
plt.title(title, fontsize=fontsize)
plt.ylim(0, 1.1)
plt.ylabel("{} fixation probability".format(names[0]))
plt.xlabel("$N$")
plt.legend(fontsize=15)
plt.savefig("../img/{}.pdf".format(filename))
In [20]:
main = pd.read_csv("../data/main.csv")
main["player"] = main["player"].apply(lambda x: abbreviate(x))
main["opponent"] = main["opponent"].apply(lambda x: abbreviate(x))
main = main[main["player"].isin(player_info["Player"]) &
main["opponent"].isin(player_info["Player"])]
# Test known values
expected = pd.DataFrame([["Cooperator", "Defector", 10, 0, 0.019, 0.500],
["TfT", "Defector", 10, 0.262, 0.915, 0.999]],
columns=main.columns)
assert_frame_equal(expected, main[((main["player"] == "TfT") | (main["player"] == "Cooperator")) &
(main["opponent"] == "Defector") &
(main["N"] == 10)].round(3).reset_index(drop=True))
In [21]:
# Check number of completed matches: 164 strategies implies 13366 pairs.
# Each pair is considered twice (invader/resistor)
for N, df in main.groupby("N"):
num_p_1 = len(df["$p_1$"].dropna().index)
num_p_n_over_2 = len(df["$p_{N/2}$"].dropna().index)
num_p_n_minus_1 = len(df["$p_{N-1}$"].dropna().index)
if N % 2 == 1:
assert (num_p_1 == num_p_n_minus_1 == 13366 * 2)
assert num_p_n_over_2 == 0
else:
assert num_p_1 == num_p_n_over_2 == num_p_n_minus_1 == 13366 * 2
In [22]:
main.head()
Out[22]:
In [23]:
main[(main["player"] == "TfT") & (main["opponent"] == "Defector")]
Out[23]:
This shows:
player in population of N-1 opponentsN/2 players in population of N/2 opponentsN - 1 players in population of 1 opponents
In [24]:
plot_file_labels = ["invade", "resist", "coexist"]
In [25]:
main.head()
Out[25]:
In [26]:
plt.rcParams['figure.figsize'] = 8, 30
fontsize = 20
for N in range(2, 14 + 1):
temp_df = main[main["N"] == N].dropna(axis=1)
if N == 2:
values = ['$p_1$']
else:
values = ['$p_1$', '$p_{N-1}$']
for value, plot_file_label in zip(values, plot_file_labels):
fig, ax = plt.subplots()
data, labels = [], []
for player, df in temp_df.groupby("player"):
data.append(list(df[value].dropna()))
labels.append(player)
mean_fixation = [np.mean(ele) for ele in data]
labels, data, _ = zip(*sorted(list(zip(labels, data, mean_fixation)),
key=lambda x: x[2]))
# Plot the mean scores
ax.boxplot(data, vert=False)
xs = sorted(mean_fixation)
ys = range(1, len(xs) + 1)
ax.plot(xs, ys, linewidth=3)
# Plot the neutral fixation
if value == '$p_1$':
ax.axvline(1 / N, linestyle="dashed", linewidth=3, label="$\\frac{1}{N}$", color="black")
elif value == '$p_{N/2}$':
ax.axvline(1 / 2, linestyle="dashed", linewidth=3, label="$\\frac{1}{2}$", color="black")
else:
ax.axvline((N - 1) / N, linestyle="dashed", linewidth=3, label="$\\frac{N - 1}{N}$", color="black")
ax.set_yticklabels(labels)
ax.set_title("$N={}$ {}".format(N, value))
ax.set_xlim((0, 1))
ax.grid(b=False)
ax.legend(fontsize=fontsize)
fig.tight_layout()
fig.savefig("../img/boxplot_{}_{}.pdf".format(N, plot_file_label))
In [27]:
def average_rank_df(col):
df = pd.DataFrame(main.groupby(["player", "N"])[col].mean()).reset_index()
return df.pivot(index="player", columns="N", values=col).rank(ascending=False).reset_index()
In [28]:
def rank_summary_df(value, N=2, number=5):
data = []
temp_df = main[main["player"].isin(average_rank_df(value).sort_values(N).head(number)["player"]) &
(main["N"] == N)]
for player, df in temp_df.groupby("player"):
data.append([player, np.mean(df[value].dropna())])
df = pd.DataFrame(data, columns=["Player", "Mean {}".format(value)])
return df.sort_values("Mean {}".format(value), ascending=False)
In [29]:
number = int(len(players_of_interest) / 10) # Consider top 10%
for N in range(2, 14 + 1):
temp_df = main[main["N"] == N].dropna(axis=1)
if N == 2:
values = ['$p_1$']
else:
values = ['$p_1$', '$p_{N-1}$']
for value, plot_file_label in zip(values, plot_file_labels):
temp_df = main[main["N"] == N].dropna(axis=1)
if N == 2:
values = ['$p_1$']
elif N % 2 == 0:
values = ['$p_1$','$p_{N-1}$', '$p_{N/2}$']
else:
values = ['$p_1$', '$p_{N-1}$']
for value, plot_file_label in zip(values, plot_file_labels):
temp_df = rank_summary_df(value=value, N=N, number=number).round(4)
temp_df.index = range(1, number + 1)
with open("../tex/summary_top_{}_{}.tex".format(N, plot_file_label), "w") as f:
f.write(clean_latex(temp_df.to_latex().replace("-1", "\(\infty\)").replace("\{N\(\infty\)\}", "{N-1}")))
In [30]:
def plot_ranks( value):
rank_df = average_rank_df(value).dropna(axis=1)
Ns = list(rank_df.columns[1:])
fig, ax1 = plt.subplots()
sorted_df = rank_df.sort_values(Ns[0], ascending=False)
ranks, labels = list(zip(*enumerate(sorted_df["player"])))
ranks = [r + 1 for r in ranks]
sorted_df = rank_df.sort_values(Ns[-1], ascending=False)
last_labels = list(sorted_df["player"])
for ax, labels in zip([ax1, ax1.twinx()], [labels, last_labels]):
ax.set_ylim([min(ranks) - 3, max(ranks) + 3])
ax.set_yticks(ranks)
ax.set_yticklabels(labels)
ax.grid(b=False)
sorted_df = rank_df.sort_values(Ns[0], ascending=False)
color_indices = np.linspace(0, 1, len(labels))
for color_index, (_, row) in enumerate(sorted_df.iterrows()):
plot_color = matplotlib.cm.viridis(color_indices[color_index])
ax1.plot([len(ranks) + 1 - r for r in list(row)[1:]], c=plot_color)
ax1.set_xticks(range(len(Ns)))
ax1.set_xticklabels(list(Ns))
ax1.set_xlim(0, len(Ns) - 1)
ax1.set_title("Ranks of Players for {}".format(value))
ax1.grid(b=False)
fig.tight_layout()
return fig
In [31]:
plt.rcParams['figure.figsize'] = 25, 30
for value, plot_file_label in zip(['$p_1$','$p_{N-1}$', '$p_{N/2}$'], plot_file_labels):
p = plot_ranks(value=value)
p.savefig("../img/average_rank_vs_population_size_{}.pdf".format(plot_file_label))
In [32]:
def get_correlation_coefficients(rank_df):
temp_df = rank_df.dropna(axis=1).select_dtypes(include=['float64'])
numeric_data = np.array(temp_df).transpose()
return pd.DataFrame(np.corrcoef(numeric_data),
columns=temp_df.columns,
index=list(temp_df.columns)).round(2)
In [33]:
plt.rcParams['figure.figsize'] = 8, 8
for value, plot_file_label in zip(['$p_1$','$p_{N-1}$', '$p_{N/2}$'], plot_file_labels):
temp_df = get_correlation_coefficients(average_rank_df(value))
temp_df.to_csv("../data/correlation_coefficient_{}.csv".format(plot_file_label))
plt.figure()
ax = plt.gca()
im = plt.imshow(temp_df, cmap="viridis")
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size=1, pad=0.4)
plt.colorbar(im, cax=cax)
ax.set_yticks(range(len(temp_df.index)))
ax.set_yticklabels(temp_df.index)
ax.set_xticks(range(len(temp_df.index)))
ax.set_xticklabels(temp_df.index)
ax.set_title("{}".format(value))
ax.grid(b=False)
ax.patch.set_facecolor('None')
plt.tight_layout()
plt.savefig("../img/correlation_heatmap_{}.pdf".format(plot_file_label))
Investigate the change of rank:
In [34]:
def change_of_rank_df(N_init=2, value="$p_1$", last_N=14):
if value == "$p_{N/2}$" :
Ns = range(2, last_N + 1, 2)
else:
Ns = range(2, last_N + 1)
# if N_init == 2:
# value = "$p_1$"
df = pd.DataFrame(rank_summary_df(value, N=N_init)["Player"])
for N in Ns:
df[N] = list(main[main["N"] == N].groupby(["player"])[value].mean().rank(numeric_only=True, ascending=False)[df["Player"]])
return df
In [35]:
last_N = 14
ZD_players = [abbreviate(p) for p in players_of_interest if "ZD" in str(p)]
ZD_players
for plot_file_label, value in zip(plot_file_labels, ["$p_1$", "$p_{N-1}$", "$p_{N/2}$"]):
zd_df = average_rank_df(value)
zd_df = zd_df[zd_df["player"].isin(ZD_players)].sort_values(2).dropna(axis=1)
zd_df.rename(columns={"player":"Player"}, inplace=True)
tf_df = average_rank_df(value)
tf_df = tf_df[tf_df["player"].isin(["TF1", "TF2", "TF3"])].sort_values(2).dropna(axis=1)
tf_df.rename(columns={"player":"Player"}, inplace=True)
df_2 = change_of_rank_df(N_init=2, value=value, last_N=last_N)
df_last = change_of_rank_df(N_init=last_N, value=value, last_N=last_N)
df = pd.concat([df_2, df_last, tf_df, zd_df])
df.drop_duplicates(inplace=True)
df.rename(columns={"Player": "Size"}, inplace=True)
print(df)
latex_list = clean_latex(df.to_latex(index=False)).splitlines()
latex_list.insert(4 + len(df_2.index), "\\midrule")
num_ranking_tf = sum(pd.concat([df_2, df_last])["Player"].isin(["TF1", "TF2", "TF3"]))
num_extra_tf = 3 - num_ranking_tf
latex_list.insert(-(2 + len(zd_df.index) + num_extra_tf), "\\midrule")
latex_list.insert(-(2 + len(zd_df.index)), "\\midrule")
with open("../tex/change_of_rank_{}.tex".format(plot_file_label), "w") as f:
f.write('\n'.join(latex_list).replace(".0", ""))
In [36]:
player_names = list(map(abbreviate, sorted([str(p) for p in selected_players()
if ("length" not in p.classifier["makes_use_of"]) and
("Incorrect" not in abbreviate(p))])))
def cooperation_heatmap(filename, player_names=player_names):
matrix = np.array(pd.read_csv(filename, header=None))
number_of_players = len(player_names)
assert matrix.shape == (number_of_players, 200)
fig, ax = plt.subplots()
width = number_of_players / 4
height = width
spacing = 4
fig.set_size_inches(width, height)
mat = ax.matshow(matrix, cmap="plasma")
ax.set_yticks(range(number_of_players))
ax.set_yticklabels(player_names)
ax.set_xlabel("Rounds", fontsize=40)
ax.tick_params(axis='both', which='both', labelsize=16)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.2)
cax.tick_params(labelsize=40)
cbar = fig.colorbar(mat, cax=cax)
fig.tight_layout()
base = os.path.basename(filename)
plot_file_name, _ = os.path.splitext(base)
plot_file_name = plot_file_name.replace(" ", "_")
plot_file_name = plot_file_name.replace(".", "-")
plot_file_name = plot_file_name.replace(",", "")
plot_file_name = plot_file_name.replace(":", "")
plot_file_name = plot_file_name.replace("__", "_")
fig.savefig("../img/{}.pdf".format(plot_file_name))
In [37]:
cooperation_files = glob.glob("../data/cooperation_*_array.gz")
for file in cooperation_files:
cooperation_heatmap(file)