実験の概要: README.md
利得表
<table align="center", style="text-align:center;">
In [1]:
#-*- encoding: utf-8 -*-
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
import scipy.stats as stats
pd.set_option('display.precision', 4)
import sys
sys.path.append('./user_strategies')
# 日本語対応
mpl.rcParams['font.family'] = 'Osaka'
import play as pl
from Iida_perfect_monitoring import Iida_pm
from Iida_imperfect_public import Iida_ipm
from Iida_imperfect_private import Iida_iprm
from kato import KatoStrategy
from ikegami_perfect import Self_Centered_perfect
from ikegami_imperfect_public import Self_Centered_public
from ikegami_imperfect_private import Self_Centered_private
from mhanami_Public_Strategy import PubStrategy
from mhanami_Imperfect_Public_Strategy import ImPubStrategy
from mhanami_Imperfect_Private_Strategy import ImPrivStrategy
from tsuyoshi import GrimTrigger
from gistfile1 import MyStrategy
from beeleb_Strategy import beeleb
from oyama import OyamaPerfectMonitoring, OyamaImperfectPublicMonitoring, OyamaImperfectPrivateMonitoring
from ogawa import ogawa
from yamagishi_impd import yamagishi
from kandori import *
In [2]:
import unittest
class TestStrategies(unittest.TestCase):
def setUp(self):
self.Strategies = [Iida_pm, Iida_ipm, Iida_iprm, KatoStrategy, Self_Centered_perfect, \
Self_Centered_public, Self_Centered_private, PubStrategy, ImPubStrategy, ImPrivStrategy, \
MyStrategy, beeleb, OyamaPerfectMonitoring, \
OyamaImperfectPublicMonitoring, OyamaImperfectPrivateMonitoring, \
ogawa, yamagishi, GrimTrigger, Strategy1, Strategy2, Strategy3, Strategy4, Strategy5,
Strategy6, Strategy7, Strategy8, Strategy9, Strategy10,
Strategy11, Strategy12, Strategy13, Strategy14, Strategy15,
Strategy16, Strategy17, Strategy18, Strategy19, Strategy20,
Strategy21, Strategy22, Strategy23, Strategy24, ] # ここに自作のclassを入れる
self.case1 = "Signal is empty(period 1)"
self.case2 = [0, 1]
self.case3 = [1, 0]
self.case4 = [0, 1, 0, 1, 0, 0, 1]
self.seed = 222
self.RandomState = np.random.RandomState(self.seed)
# case1を引数に渡してテスト
def test1(self):
print("testcase:", self.case1)
for Strategy in self.Strategies:
rst = Strategy(self.RandomState).play()
self.assertIsNotNone(rst, Strategy.__module__)
self.assertIn(rst, (0, 1), Strategy.__module__)
# case2を引数に渡してテスト
def test2(self):
print("testcase:", self.case2)
for Strategy in self.Strategies:
S = Strategy(self.RandomState)
for signal in self.case2:
rst = S.play()
S.get_signal(signal)
self.assertIsNotNone(rst, Strategy.__module__)
self.assertIn(rst, (0, 1), Strategy.__module__)
# case3を引数に渡してテスト
def test3(self):
print("testcase:", self.case3)
for Strategy in self.Strategies:
S = Strategy(self.RandomState)
for signal in self.case3:
rst = S.play()
S.get_signal(signal)
self.assertIsNotNone(rst, S.__module__)
self.assertIn(rst, (0, 1), S.__module__)
# case4を引数に渡してテスト
def test4(self):
print("testcase:", self.case4)
for Strategy in self.Strategies:
S = Strategy(self.RandomState)
for signal in self.case4:
rst = S.play()
S.get_signal(signal)
self.assertIsNotNone(rst, S.__module__)
self.assertIn(rst, (0, 1), S.__module__)
In [3]:
suite = unittest.TestLoader().loadTestsFromTestCase(TestStrategies)
unittest.TextTestRunner().run(suite)
Out[3]:
Test: OK
In [4]:
payoff = np.array([[4, 0], [5, 2]])
seed = 282
rs = np.random.RandomState(seed)
discount_v = 0.97
repeat = 1000
ts_length = rs.geometric(p=1-discount_v, size=1000)
In [5]:
print("基本統計量:")
print(pd.DataFrame(ts_length, columns=["ts_length"]).describe())
print("\n33.33期未満: {0}%".format(ts_length[ts_length <= 33].size / 10))
fig, ax = plt.subplots(figsize=(20, 5))
plt.title("1000セッションの期数の分布")
# actual histogram
plt.hist(ts_length, bins=np.max(ts_length)-1, color='#4488FF')
# theoretical cdf
x = np.arange(1, np.max(ts_length))
plt.plot(x, stats.geom.pmf(x, 1-discount_v)*1000, linewidth=2, color='green', label="theoretical cdf(average=33.33)")
mu = np.mean(ts_length)
sigma = np.var(ts_length)
plt.xlabel("ts_length")
plt.ylabel("number of session")
ax.text(35, 30, r'''$\mu$={0:.3f}, $\sigma^2$={1:.3f}'''.format(mu, sigma), ha = 'left', va = 'bottom', size=15)
ax.grid(True)
ax.axvline(x=mu, linewidth=2, color='red', label="average")
plt.legend()
plt.show()
In [6]:
trimmed_ts_length = np.sort(ts_length)[50:950]
print("基本統計量:")
print(pd.DataFrame(trimmed_ts_length, columns=["trimmed_ts_length"]).describe())
print("\n33.33期未満: {0}%".format(trimmed_ts_length[trimmed_ts_length <= 33].size / 10))
mu = np.mean(trimmed_ts_length)
sigma = np.var(trimmed_ts_length)
fig, ax = plt.subplots(figsize=(20, 5))
plt.title("900セッションの期数の分布")
# actual histogram
plt.hist(trimmed_ts_length, bins=np.max(trimmed_ts_length)-1, color='#4488FF')
# theoretical cdf
x = np.arange(1, np.max(trimmed_ts_length))
plt.plot(x, stats.geom.pmf(x, 1-discount_v)*1000, linewidth=2, color='green', label="theoretical cdf(average=33.33)")
plt.xlabel("trimmed_ts_length")
plt.ylabel("number of session")
ax.text(30, 30, r'''$\mu$={0:.3f}, $\sigma^2$={1:.3f}'''.format(mu, sigma), ha = 'left', va = 'bottom', size=14)
ax.grid(True)
ax.axvline(x=mu, linewidth=2, color='red')
plt.legend()
plt.show()
2〜100期の範囲になる。
結果の生データ(csv)は contest1/data
戦略はuser_strategies
戦略のオートマトンはcontest1/automaton1.pdf
In [7]:
strategies = [Iida_pm, PubStrategy, KatoStrategy, Self_Centered_perfect,
GrimTrigger, MyStrategy, beeleb, OyamaPerfectMonitoring, ogawa, yamagishi]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=None, ts_length=ts_length, repeat=1000)
game.play(mtype="perfect", random_seed=seed, record=False)
| Datetime | 2015-12-04-18-23-58 | |||||
|---|---|---|---|---|---|---|
| Monitoring type | perfect | |||||
| RandomSeed | 282 | |||||
| Repeats | 1000 | |||||
| Average ts_length | 32.856 | |||||
| Number of strategies | 10 | |||||
| Str_numbers | Strategy name | Avarage(session based) | Rank(session based) | Avarage(stage based) | Rank(stage based) | 備考 |
| 10 | yamagishi_impd.yamagishi | 3.785542333 | 1 | 3.69566526 | 1 | TFT |
| 4 | ikegami_perfect.Self_Centered_perfect | 3.709163441 | 2 | 3.569823202 | 2 | 30% |
| 7 | beeleb_Strategy.beeleb | 3.707132242 | 3 | 3.56862944 | 3 | |
| 2 | mhanami_Public_Strategy.PubStrategy | 3.695745756 | 4 | 3.560563942 | 4 | TFT' |
| 8 | oyama.OyamaPerfectMonitoring | 3.695585282 | 5 | 3.560097259 | 5 | GT |
| 6 | gistfile1.MyStrategy | 3.689240046 | 6 | 3.521959459 | 6 | TFT' |
| 1 | Iida_perfect_monitoring.Iida_pm | 3.638486141 | 7 | 3.386765144 | 8 | |
| 5 | tsuyoshi.GrimTrigger | 3.617644214 | 8 | 3.408783784 | 7 | TFT' |
| 9 | ogawa.ogawa | 3.479693556 | 9 | 3.13531775 | 9 | |
| 3 | kato.KatoStrategy | 3.046090794 | 10 | 2.713737386 | 10 | |
TFT = Tit for Tat, GT = GrimTrigger, 30% = {過去全てのシグナルが30%以上BならD、そうでなければCを返す}
TFT' = Tit for Tatの亜種(確率を分岐条件に加える, stateを増やすなど)
全体的に平均利得が4に近い(= 協調がかなりの程度達成されている)。
TFTが圧倒的に高い利得を得た一方で、定期的に必ずDを出す戦略(戦略9, 3)は平均利得が低くなっている。
In [8]:
rounds = 1000 * 2
strategies = 10
max_ts = 100
# 読み込み
df = pd.read_csv('./contest1/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# 行: プレイヤー, 列: 1000*2セッション分の平均利得
average_matrix = np.zeros((rounds*(strategies-1), strategies), dtype=float)
for s in range(1, strategies+1):
for i, opponent in enumerate(df[str(s)].columns.values):
average_matrix[i*rounds:(i+1)*rounds, s-1] = df[str(s)][str(opponent)]
averages = np.zeros(strategies, dtype=float)
stds = np.zeros(strategies, dtype=float)
ranking = np.zeros(strategies, dtype=int)
for i in range(strategies):
averages[i] = average_matrix[:, i].mean()
stds[i] = average_matrix[:, i].std()
ranking = np.argsort(averages)[::-1]+1
fig, ax = plt.subplots(figsize=(20, 12))
bp = ax.boxplot(average_matrix, 0, '')
plt.grid()
plt.xlabel('戦略番号')
plt.ylabel('1セッションの平均利得')
ax.set_xlim([0, strategies+0.5])
ax.set_ylim([-0.1, 5.8])
plt.title('戦略別, 全セッションの平均利得の分布')
ax.text(0.4, 5.3, "ranking\nave\nstd", ha = 'center', va = 'center', color="black", size=14)
for i in range(strategies):
ax.text(i+1, 5.3, "{0:.0f}\n{1:.3f}\n{2:.3f}"
.format(np.where(ranking == i+1)[0][0]+1, averages[i], stds[i]), ha = 'center', va = 'center', color="black", size=14)
plt.show()
| str number | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
|---|---|---|---|---|---|---|---|---|---|---|
| rank | 7 | 4 | 10 | 2 | 8 | 6 | 3 | 5 | 9 | 1 |
| count | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 |
| mean | 3.638486 | 3.695746 | 3.046091 | 3.709163 | 3.617644 | 3.68924 | 3.707132 | 3.695585 | 3.479694 | 3.785542 |
| std | 0.649774 | 0.604642 | 0.874775 | 0.58606 | 0.7979 | 0.634951 | 0.588227 | 0.604742 | 0.672512 | 0.469952 |
| min | 1.801394 | 2 | 2.003484 | 2 | 0.828571 | 1.41791 | 2 | 2 | 2 | 2 |
| 25% | 3.3 | 4 | 2.142857 | 4 | 4 | 4 | 4 | 4 | 2.866667 | 4 |
| 50% | 4 | 4 | 3 | 4 | 4 | 4 | 4 | 4 | 3.892857 | 4 |
| 75% | 4 | 4 | 3.931034 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |
| max | 4.466899 | 4 | 4.5 | 4 | 4 | 4 | 4 | 4 | 4.4 | 4 |
1位のTFTが最も標準偏差が小さい。全体的に、分散が小さいほど高い順位となった。
In [9]:
rounds = 1000 * 2
strategies = 10
max_ts = 100
# 読み込み
df = pd.read_csv('./contest1/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# ts_lengthの長い順に並び替え
ordered_df = df.sortlevel(level="ts_length")
# 行: プレイヤー, 列: ts_lengthが1〜100期の時の平均利得
average_matrix = np.zeros((strategies, max_ts), dtype=float)
for t in range(1, max_ts+1):
df_t = df.iloc[df.index.get_level_values('ts_length') == t]
for s in range(1, strategies+1):
average = df_t[str(s)].mean().mean()
average_matrix[s-1, t-1] = average
fig, ax = plt.subplots(figsize=(20, 10))
plt.title("average payoff trend")
plt.xlabel("ts_length")
plt.ylabel("average payoff")
t_list = [i for i in range(1, max_ts+1)]
for s in range(1, strategies+1):
if s in [10, 8, 4]:
pass
else:
average_list = average_matrix[s-1]
plt.plot(t_list, average_list, color='#bbbbbb')
plt.plot(t_list, average_matrix[10-1], color='red', linewidth=2, label="10 (TFT)")
plt.plot(t_list, average_matrix[8-1], color='blue', linewidth=2, label="8 (GrimTrigger)")
plt.plot(t_list, average_matrix[4-1], color='green', linewidth=2, label="4 (30%)")
plt.legend()
plt.show()
TFTは期数の長いセッションで、他の戦略に比べて協調に成功している。
結果の生データ(csv)は contest2/data
戦略はuser_strategies
戦略のオートマトンはcontest2/automaton2.pdf
In [10]:
# プロジェクトが成功か失敗かを返す
def public_signal(actions, random_state):
prob = random_state.uniform()
if actions[0] == 0 and actions[1] == 0:
return 0 if prob < 0.9 else 1
elif (actions[0] == 0 and actions[1] == 1) or (actions[0] == 1 and actions[1] == 0):
return 0 if prob < 0.5 else 1
elif actions[0] == 1 and actions[1] == 1:
return 0 if prob < 0.2 else 1
else:
raise ValueError
strategies = [Iida_ipm, ImPubStrategy, KatoStrategy, Self_Centered_public, GrimTrigger,
MyStrategy, beeleb, OyamaImperfectPublicMonitoring, ogawa, yamagishi]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=public_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="public", random_seed=seed, record=False)
| Datetime | 2015-12-04-19-27-41 | |||||
|---|---|---|---|---|---|---|
| Monitoring type | public | |||||
| RandomSeed | 282 | |||||
| Repeats | 1000 | |||||
| Average ts_length | 32.856 | |||||
| Number of strategies | 10 | |||||
| Str_numbers | Strategy name | Avarage(session based) | Rank(session based) | Avarage(stage based) | Rank(stage based) | 備考 |
| 2 | mhanami_Imperfect_Public_Strategy.ImPubStrategy | 3.426505902 | 1 | 3.085798636 | 1 | ALLD |
| 3 | kato.KatoStrategy | 3.300688557 | 2 | 3.082721235 | 2 | |
| 4 | ikegami_imperfect_public.Self_Centered_public | 3.204767877 | 3 | 3.024595541 | 4 | 25% |
| 8 | oyama.OyamaImperfectPublicMonitoring | 3.164308602 | 4 | 3.06851446 | 3 | GT' |
| 1 | Iida_imperfect_public.Iida_ipm | 3.15370844 | 5 | 2.949672646 | 5 | |
| 10 | yamagishi_impd.yamagishi | 3.060373179 | 6 | 2.824435922 | 7 | TFT |
| 9 | ogawa.ogawa | 3.02123959 | 7 | 2.910746896 | 6 | |
| 5 | tsuyoshi.GrimTrigger | 2.873939966 | 8 | 2.566333225 | 8 | TFT' |
| 7 | beeleb_Strategy.beeleb | 2.80155086 | 9 | 2.545187079 | 9 | |
| 6 | gistfile1.MyStrategy | 2.781473904 | 10 | 2.45014271 | 10 | TFT' |
戦略2(ALLD)と戦略3(定期的にDを出す戦略)が上位となった。
In [11]:
rounds = 1000 * 2
strategies = 10
max_ts = 100
# 読み込み
df = pd.read_csv('./contest2/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# 行: プレイヤー, 列: 1000*2セッション分の平均利得
average_matrix = np.zeros((rounds*(strategies-1), strategies), dtype=float)
for s in range(1, strategies+1):
for i, opponent in enumerate(df[str(s)].columns.values):
average_matrix[i*rounds:(i+1)*rounds, s-1] = df[str(s)][str(opponent)]
averages = np.zeros(strategies, dtype=float)
stds = np.zeros(strategies, dtype=float)
ranking = np.zeros(strategies, dtype=int)
for i in range(strategies):
averages[i] = average_matrix[:, i].mean()
stds[i] = average_matrix[:, i].std()
ranking = np.argsort(averages)[::-1]+1
fig, ax = plt.subplots(figsize=(20, 12))
bp = ax.boxplot(average_matrix, 0, '')
plt.grid()
plt.xlabel('戦略番号')
plt.ylabel('1セッションの平均利得')
ax.set_xlim([0, strategies+0.5])
ax.set_ylim([-0.1, 5.8])
plt.title('戦略別, 全セッションの平均利得の分布')
ax.text(0.4, 5.3, "ranking\nave\nstd", ha = 'center', va = 'center', color="black", size=14)
for i in range(strategies):
ax.text(i+1, 5.3, "{0:.0f}\n{1:.3f}\n{2:.3f}"
.format(np.where(ranking == i+1)[0][0]+1, averages[i], stds[i]), ha = 'center', va = 'center', color="black", size=14)
plt.show()
| str number | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
|---|---|---|---|---|---|---|---|---|---|---|
| rank | 5 | 1 | 2 | 3 | 8 | 10 | 9 | 4 | 7 | 6 |
| count | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 |
| mean | 3.153708 | 3.426506 | 3.300689 | 3.204768 | 2.87394 | 2.781474 | 2.801551 | 3.164309 | 3.02124 | 3.060373 |
| std | 1.068773 | 0.989293 | 0.981721 | 0.932929 | 1.311979 | 1.35814 | 1.319016 | 1.042236 | 1.180943 | 1.091448 |
| min | 0 | 2.010453 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 25% | 2.215385 | 2.545455 | 2.496503 | 2.425827 | 1.653061 | 1.555556 | 1.672578 | 2.406039 | 2 | 2 |
| 50% | 3.545455 | 3.3125 | 3.416667 | 3.5 | 3.44 | 3.384615 | 3.353095 | 3.563063 | 3.532292 | 3.558846 |
| 75% | 4 | 4.25 | 4.25 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |
| max | 4.9 | 5 | 4.97619 | 4.97561 | 4.857143 | 4.333333 | 4.103448 | 4.981481 | 4.659574 | 4.8 |
実験1とは異なり, 分散と順位の間に明確な関係は見られない。
In [12]:
rounds = 1000 * 2
strategies = 10
max_ts = 100
# 読み込み
df = pd.read_csv('./contest2/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# ts_lengthの長い順に並び替え
ordered_df = df.sortlevel(level="ts_length")
# 行: プレイヤー, 列: ts_lengthが1〜100期の時の平均利得
average_matrix = np.zeros((strategies, max_ts), dtype=float)
for t in range(1, max_ts+1):
df_t = df.iloc[df.index.get_level_values('ts_length') == t]
for s in range(1, strategies+1):
average = df_t[str(s)].mean().mean()
average_matrix[s-1, t-1] = average
fig, ax = plt.subplots(figsize=(20, 10))
plt.title("average payoff trend")
plt.xlabel("ts_length")
plt.ylabel("average payoff")
t_list = [i for i in range(1, max_ts+1)]
for s in range(1, strategies+1):
if s in [2, 8, 4, 10]:
pass
else:
average_list = average_matrix[s-1]
plt.plot(t_list, average_list, color='#bbbbbb')
plt.plot(t_list, average_matrix[2-1], color='red', linewidth=2, label="2 (ALLD)")
plt.plot(t_list, average_matrix[4-1], color='green', linewidth=2, label="4 (25%)")
plt.plot(t_list, average_matrix[8-1], color='blue', linewidth=2, label="8 (GT’)")
plt.plot(t_list, average_matrix[10-1], color='orange', linewidth=2, label="10 (TFT)")
plt.legend()
plt.show()
上位の戦略は、期数の短長にかかわらず、安定した平均利得をえている。
ALLDは特に短い期数のセッションでの平均利得が大きく、1位になった要因だと考えられる。
結果の生データ(csv)は contest3/data
戦略はuser_strategies
戦略のオートマトンはcontest3/automaton3.pdf
In [13]:
# 「相手の」シグナルが協調か攻撃かを(ノイズ付きで)返す
def private_signal(actions, random_state):
pattern = [[0, 0], [0, 1], [1, 0], [1, 1]]
# 例えば実際の行動が(0, 1)なら、シグナルは(1, 0)である可能性が最も高い
signal_probs = [[.9, .02, .02, .06], [.02, .06, .9, .02], [.02, .9, .06, .02], [.06, .02, .02, .9]]
p = random_state.uniform()
if actions[0] == 0 and actions[1] == 0:
return [0, 0] if p < 0.9 else [0, 1] if p < 0.92 else [1, 0] if p < 0.94 else [1, 1]
elif actions[0] == 0 and actions[1] == 1:
return [1, 0] if p < 0.9 else [0, 0] if p < 0.92 else [1, 1] if p < 0.94 else [0, 1]
elif actions[0] == 1 and actions[1] == 0:
return [0, 1] if p < 0.9 else [1, 1] if p < 0.92 else [0, 0] if p < 0.94 else [1, 0]
elif actions[0] == 1 and actions[1] == 1:
return [1, 1] if p < 0.9 else [1, 0] if p < 0.92 else [0, 1] if p < 0.94 else [0, 0]
else:
raise ValueError
strategies = [Iida_iprm, ImPrivStrategy, KatoStrategy, Self_Centered_private, GrimTrigger,
MyStrategy, beeleb, OyamaImperfectPrivateMonitoring, ogawa, yamagishi]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=private_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="private", random_seed=seed, record=False)
| Datetime | 2015-12-05-00-14-04 | |||||
|---|---|---|---|---|---|---|
| Monitoring type | private | |||||
| RandomSeed | 282 | |||||
| Repeats | 1000 | |||||
| Average ts_length | 32.856 | |||||
| Number of strategies | 10 | |||||
| Str_numbers | Strategy name | Avarage(session based) | Rank(session based) | Avarage(stage based) | Rank(stage based) | 備考 |
| 7 | beeleb_Strategy.beeleb | 3.458461687 | 1 | 3.293973027 | 1 | |
| 10 | yamagishi_impd.yamagishi | 3.445953725 | 2 | 3.239141168 | 3 | TFT |
| 2 | mhanami_Imperfect_Private_Strategy.ImPrivStrategy | 3.445605137 | 3 | 3.256924154 | 2 | 2T2FT |
| 6 | gistfile1.MyStrategy | 3.426852855 | 4 | 3.213336647 | 4 | TFT' |
| 8 | oyama.OyamaImperfectPrivateMonitoring | 3.405064024 | 5 | 3.178198469 | 5 | TFT' |
| 1 | Iida_imperfect_private.Iida_iprm | 3.389575102 | 6 | 3.114596015 | 7 | |
| 9 | ogawa.ogawa | 3.389554064 | 7 | 3.095465398 | 8 | |
| 4 | ikegami_imperfect_private.Self_Centered_private | 3.374520784 | 8 | 3.137879433 | 6 | 20% |
| 5 | tsuyoshi.GrimTrigger | 3.244853934 | 9 | 2.928183251 | 9 | TFT' |
| 3 | kato.KatoStrategy | 3.240895862 | 10 | 2.899218475 | 10 | |
In [14]:
rounds = 1000 * 2
strategies = 10
max_ts = 100
# 読み込み
df = pd.read_csv('./contest3/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# 行: プレイヤー, 列: 1000*2セッション分の平均利得
average_matrix = np.zeros((rounds*(strategies-1), strategies), dtype=float)
for s in range(1, strategies+1):
for i, opponent in enumerate(df[str(s)].columns.values):
average_matrix[i*rounds:(i+1)*rounds, s-1] = df[str(s)][str(opponent)]
averages = np.zeros(strategies, dtype=float)
stds = np.zeros(strategies, dtype=float)
ranking = np.zeros(strategies, dtype=int)
for i in range(strategies):
averages[i] = average_matrix[:, i].mean()
stds[i] = average_matrix[:, i].std()
ranking = np.argsort(averages)[::-1]+1
fig, ax = plt.subplots(figsize=(20, 12))
bp = ax.boxplot(average_matrix, 0, '')
plt.grid()
plt.xlabel('戦略番号')
plt.ylabel('1セッションの平均利得')
ax.set_xlim([0, strategies+0.5])
ax.set_ylim([-0.1, 5.8])
plt.title('戦略別, 全セッションの平均利得の分布')
ax.text(0.4, 5.3, "ranking\nave\nstd", ha = 'center', va = 'center', color="black", size=14)
for i in range(strategies):
ax.text(i+1, 5.3, "{0:.0f}\n{1:.3f}\n{2:.3f}"
.format(np.where(ranking == i+1)[0][0]+1, averages[i], stds[i]), ha = 'center', va = 'center', color="black", size=14)
plt.show()
| str number | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
|---|---|---|---|---|---|---|---|---|---|---|
| rank | 6 | 3 | 10 | 8 | 9 | 4 | 1 | 5 | 7 | 2 |
| count | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 | 18000 |
| mean | 3.389575 | 3.445605 | 3.240896 | 3.374521 | 3.244854 | 3.426853 | 3.458462 | 3.405064 | 3.389554 | 3.445954 |
| std | 0.767754 | 0.693915 | 0.811377 | 0.70892 | 0.901283 | 0.80159 | 0.729059 | 0.720129 | 0.714496 | 0.699362 |
| min | 1.230769 | 1.2 | 1.416667 | 1.333333 | 0.571429 | 0.615385 | 0.666667 | 0.571429 | 0.8 | 1.333333 |
| 25% | 2.875 | 2.857143 | 2.526316 | 2.707154 | 2.695652 | 3 | 2.962963 | 2.84 | 2.792308 | 2.952381 |
| 50% | 3.666667 | 3.911111 | 3.195387 | 3.666667 | 3.607143 | 3.947114 | 3.917526 | 3.758621 | 3.652618 | 3.793103 |
| 75% | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |
| max | 4.8 | 4.4 | 4.8 | 4.857143 | 4.8 | 4.25 | 4.038462 | 4.8 | 4.428571 | 4.5 |
In [15]:
rounds = 1000 * 2
strategies = 10
max_ts = 100
# 読み込み
df = pd.read_csv('./contest3/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# ts_lengthの長い順に並び替え
ordered_df = df.sortlevel(level="ts_length")
# 行: プレイヤー, 列: ts_lengthが1〜100期の時の平均利得
average_matrix = np.zeros((strategies, max_ts), dtype=float)
for t in range(1, max_ts+1):
df_t = df.iloc[df.index.get_level_values('ts_length') == t]
for s in range(1, strategies+1):
average = df_t[str(s)].mean().mean()
average_matrix[s-1, t-1] = average
fig, ax = plt.subplots(figsize=(20, 10))
plt.title("average payoff trend")
plt.xlabel("ts_length")
plt.ylabel("average payoff")
t_list = [i for i in range(1, max_ts+1)]
for s in range(1, strategies+1):
if s in [2, 7, 4, 10]:
pass
else:
average_list = average_matrix[s-1]
plt.plot(t_list, average_list, color='#bbbbbb')
plt.plot(t_list, average_matrix[7-1], color='red', linewidth=2, label="7")
plt.plot(t_list, average_matrix[10-1], color='orange', linewidth=2, label="10 (TFT)")
plt.plot(t_list, average_matrix[2-1], color='blue', linewidth=2, label="2 (2T2FT)")
plt.plot(t_list, average_matrix[4-1], color='green', linewidth=2, label="4 (20%)")
plt.legend()
plt.show()
期数が長くなるに従って、協調がしづらくなっていることがわかる。TFT同士の対戦では、このようなことが一般に起こる(後述)
結果の生データ(csv)は contest4/data
戦略は user_strategies
戦略のオートマトンは contest4/automaton4.pdf
神取ゼミの元実験と尾山ゼミでの再実験の比較(各対戦毎のセッション平均利得の違い) contest4/神取ゼミ実験_尾山ゼミ再実験比較.xlsx
In [16]:
# 「相手の」シグナルが協調か攻撃かを(ノイズ付きで)返す
def private_signal(actions, random_state):
pattern = [[0, 0], [0, 1], [1, 0], [1, 1]]
# 例えば実際の行動が(0, 1)なら、シグナルは(1, 0)である可能性が最も高い
signal_probs = [[.9, .02, .02, .06], [.02, .06, .9, .02], [.02, .9, .06, .02], [.06, .02, .02, .9]]
p = random_state.uniform()
if actions[0] == 0 and actions[1] == 0:
return [0, 0] if p < 0.9 else [0, 1] if p < 0.92 else [1, 0] if p < 0.94 else [1, 1]
elif actions[0] == 0 and actions[1] == 1:
return [1, 0] if p < 0.9 else [0, 0] if p < 0.92 else [1, 1] if p < 0.94 else [0, 1]
elif actions[0] == 1 and actions[1] == 0:
return [0, 1] if p < 0.9 else [1, 1] if p < 0.92 else [0, 0] if p < 0.94 else [1, 0]
elif actions[0] == 1 and actions[1] == 1:
return [1, 1] if p < 0.9 else [1, 0] if p < 0.92 else [0, 1] if p < 0.94 else [0, 0]
else:
raise ValueError
strategies = [Strategy1, Strategy2, Strategy3, Strategy4, Strategy5,
Strategy6, Strategy7, Strategy8, Strategy9, Strategy10,
Strategy11, Strategy12, Strategy13, Strategy14, Strategy15,
Strategy16, Strategy17, Strategy18, Strategy19, Strategy20,
Strategy21, Strategy22, Strategy23, Strategy24]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=private_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="private", random_seed=seed, record=False)
| Datetime | 2015-12-05-01-07-37 | |||||
|---|---|---|---|---|---|---|
| Monitoring type | private | |||||
| RandomSeed | 282 | |||||
| Repeats | 1000 | |||||
| Average ts_length | 32.856 | |||||
| Number of strategies | 24 | |||||
| Str_numbers | Strategy name | Avarage(session based) | Rank(session based) | Avarage(stage based) | Rank(trimmed) | 備考 |
| 18 | kandori.Strategy18 | 3.35352416 | 1 | 3.219810292 | 1 | WSLS' |
| 13 | kandori.Strategy13 | 3.326308014 | 2 | 3.182248494 | 2 | CCDDDD |
| 22 | kandori.Strategy22 | 3.259068663 | 3 | 3.121244482 | 4 | |
| 14 | kandori.Strategy14 | 3.258886509 | 4 | 3.122727237 | 3 | WSLS' |
| 1 | kandori.Strategy1 | 3.256299103 | 5 | 3.132024724 | 5 | |
| 3 | kandori.Strategy3 | 3.240387724 | 6 | 3.082433491 | 6 | WSLS' |
| 21 | kandori.Strategy21 | 3.238405281 | 7 | 3.083776638 | 7 | WSLS' |
| 2 | kandori.Strategy2 | 3.215812884 | 8 | 3.054822228 | 9 | WSLS |
| 16 | kandori.Strategy16 | 3.215812884 | 9 | 3.054822228 | 10 | WSLS |
| 17 | kandori.Strategy17 | 3.215547504 | 10 | 3.063675088 | 8 | TFT' |
| 19 | kandori.Strategy19 | 3.213334955 | 11 | 3.06115156 | 11 | TFT |
| 6 | kandori.Strategy6 | 3.197763649 | 12 | 3.056192503 | 12 | WSLS' |
| 12 | kandori.Strategy12 | 3.197073568 | 13 | 3.045809911 | 16 | |
| 20 | kandori.Strategy20 | 3.192768288 | 14 | 3.020367533 | 13 | WSLS' |
| 4 | kandori.Strategy4 | 3.191465329 | 15 | 3.086214152 | 14 | |
| 23 | kandori.Strategy23 | 3.188569289 | 16 | 3.06617612 | 15 | |
| 7 | kandori.Strategy7 | 3.166979223 | 17 | 3.001625671 | 17 | TFT' |
| 15 | kandori.Strategy15 | 3.161225612 | 18 | 2.983885545 | 19 | WSLS' |
| 11 | kandori.Strategy11 | 3.159787981 | 19 | 3.004255063 | 18 | TFT' |
| 24 | kandori.Strategy24 | 3.158548933 | 20 | 2.940998137 | 21 | |
| 10 | kandori.Strategy10 | 3.157508886 | 21 | 2.988733446 | 20 | TFT' |
| 8 | kandori.Strategy8 | 3.121529725 | 22 | 3.104081314 | 22 | HIST |
| 9 | kandori.Strategy9 | 3.088360193 | 23 | 2.962527525 | 24 | STFT |
| 5 | kandori.Strategy5 | 3.072941197 | 24 | 2.902704555 | 23 | |
CCDDDD: 最初2期はC, それ以降はDを出す戦略
STFT: 最初にDを出すTit for Tat
HIST: 過去n回以上シグナルBが出た場合はD, それ以外はCを出す戦略
WSLS: Win Stay Lose Shift. オートマトンで書くと
となる。
WSLS': WSLSに確率や状態を追加したもの.
| 順位 | 本実験 | 再実験 | ||
|---|---|---|---|---|
| 戦略 | 利得 | 戦略 | 利得 | |
| 1 | 18 | 3.356 | 18 | 3.354 |
| 2 | 13 | 3.316 | 13 | 3.326 |
| 3 | 22 | 3.263 | 22 | 3.259 |
| 4 | 14 | 3.260 | 14 | 3.259 |
| 5 | 1 | 3.255 | 1 | 3.256 |
| 6 | 3 | 3.238 | 3 | 3.240 |
| 7 | 21 | 3.227 | 21 | 3.238 |
| 8 | 16 | 3.217 | 2 | 3.216 |
| 9 | 19 | 3.217 | 16 | 3.216 |
| 10 | 2 | 3.217 | 17 | 3.216 |
| 11 | 17 | 3.215 | 19 | 3.213 |
| 12 | 6 | 3.205 | 6 | 3.198 |
| 13 | 4 | 3.192 | 12 | 3.197 |
| 14 | 23 | 3.190 | 20 | 3.193 |
| 15 | 12 | 3.187 | 4 | 3.191 |
| 16 | 20 | 3.187 | 23 | 3.189 |
| 17 | 11 | 3.164 | 7 | 3.167 |
| 18 | 7 | 3.161 | 15 | 3.161 |
| 19 | 15 | 3.151 | 11 | 3.160 |
| 20 | 10 | 3.148 | 24 | 3.159 |
| 21 | 24 | 3.140 | 10 | 3.158 |
| 22 | 8 | 3.129 | 8 | 3.122 |
| 23 | 9 | 3.084 | 9 | 3.088 |
| 24 | 5 | 3.048 | 5 | 3.073 |
In [17]:
rounds = 1000 * 2
strategies = 24
max_ts = 100
# 読み込み
df = pd.read_csv('./contest4/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# 行: プレイヤー, 列: 1000*2セッション分の平均利得
average_matrix = np.zeros((rounds*(strategies-1), strategies), dtype=float)
for s in range(1, strategies+1):
for i, opponent in enumerate(df[str(s)].columns.values):
average_matrix[i*rounds:(i+1)*rounds, s-1] = df[str(s)][str(opponent)]
averages = np.zeros(strategies, dtype=float)
stds = np.zeros(strategies, dtype=float)
ranking = np.zeros(strategies, dtype=int)
for i in range(strategies):
averages[i] = average_matrix[:, i].mean()
stds[i] = average_matrix[:, i].std()
ranking = np.argsort(averages)[::-1]+1
fig, ax = plt.subplots(figsize=(22, 12))
bp = ax.boxplot(average_matrix, 0, '')
plt.grid()
plt.xlabel('戦略番号')
plt.ylabel('1セッションの平均利得')
ax.set_xlim([0, strategies+0.5])
ax.set_ylim([-0.1, 5.8])
plt.title('戦略別, 全セッションの平均利得の分布')
ax.text(0.1, 5.3, "ranking\nave\nstd", ha = 'center', va = 'center', color="black", size=14)
for i in range(strategies):
ax.text(i+1, 5.3, "{0:.0f}\n{1:.3f}\n{2:.3f}"
.format(np.where(ranking == i+1)[0][0]+1, averages[i], stds[i]), ha = 'center', va = 'center', color="black", size=14)
plt.show()
| str number | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ranking | 5 | 8 | 6 | 15 | 24 | 12 | 17 | 22 | 23 | 21 | 19 | 13 |
| count | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 |
| mean | 3.256299 | 3.215813 | 3.240388 | 3.191465 | 3.072941 | 3.197764 | 3.166979 | 3.12153 | 3.08836 | 3.157509 | 3.159788 | 3.197074 |
| std | 0.837516 | 0.879935 | 0.850714 | 0.855328 | 0.974213 | 0.939815 | 0.765376 | 0.953146 | 0.818489 | 0.76951 | 0.845308 | 0.690475 |
| min | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 25% | 2.694303 | 2.705882 | 2.704918 | 2.658537 | 2.666667 | 2.864865 | 2.6 | 2.6 | 2.4 | 2.6 | 2.515152 | 2.7 |
| 50% | 3.548387 | 3.541918 | 3.5625 | 3.466667 | 3.303571 | 3.52381 | 3.210526 | 3.416667 | 2.9375 | 3.195122 | 3.378078 | 3.194444 |
| 75% | 4 | 3.964286 | 3.969697 | 4 | 4 | 4 | 3.947368 | 4 | 3.777778 | 3.923077 | 4 | 3.545455 |
| max | 4.923077 | 4.916667 | 4.909091 | 4.166667 | 4.25 | 4.777778 | 4.75 | 4.104895 | 5 | 4.75 | 4.333333 | 4.777778 |
| str number | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ranking | 2 | 4 | 18 | 9 | 10 | 1 | 11 | 14 | 7 | 3 | 16 | 20 |
| count | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 | 46000 |
| mean | 3.326308 | 3.258887 | 3.161226 | 3.215813 | 3.215548 | 3.353524 | 3.213335 | 3.192768 | 3.238405 | 3.259069 | 3.188569 | 3.158549 |
| std | 0.714311 | 0.847242 | 0.874287 | 0.879935 | 0.753468 | 0.742637 | 0.748892 | 0.878785 | 0.870167 | 0.896169 | 0.963549 | 0.937062 |
| min | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 25% | 2.75 | 2.875 | 2.68265 | 2.705882 | 2.661578 | 2.892204 | 2.611111 | 2.692308 | 2.769231 | 2.95 | 2.846154 | 2.522727 |
| 50% | 3.466667 | 3.55 | 3.375 | 3.541918 | 3.333333 | 3.571429 | 3.333333 | 3.482759 | 3.555556 | 3.587932 | 3.533333 | 3.325581 |
| 75% | 3.97619 | 4 | 3.849486 | 3.964286 | 4 | 4 | 4 | 3.909091 | 4 | 3.965517 | 4 | 3.742424 |
| max | 4.875 | 4.916667 | 4.909091 | 4.916667 | 4.4 | 4.923077 | 4.666667 | 4.916667 | 4.916667 | 4.916667 | 4.2 | 5 |
In [18]:
rounds = 1000 * 2
strategies = 24
max_ts = 100
# 読み込み
df = pd.read_csv('./contest4/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# ts_lengthの長い順に並び替え
ordered_df = df.sortlevel(level="ts_length")
# 行: プレイヤー, 列: ts_lengthが1〜100期の時の平均利得
average_matrix = np.zeros((strategies, max_ts), dtype=float)
for t in range(1, max_ts+1):
df_t = df.iloc[df.index.get_level_values('ts_length') == t]
for s in range(1, strategies+1):
average = df_t[str(s)].mean().mean()
average_matrix[s-1, t-1] = average
fig, ax = plt.subplots(figsize=(20, 10))
plt.title("average payoff trend")
plt.xlabel("ts_length")
plt.ylabel("average payoff")
t_list = [i for i in range(1, max_ts+1)]
for s in range(1, strategies+1):
if s in [18, 13, 2, 19, 9]:
pass
else:
average_list = average_matrix[s-1]
plt.plot(t_list, average_list, color='#bbbbbb')
plt.plot(t_list, average_matrix[18-1], color='red', linewidth=2, label="18 (WSLS’)")
plt.plot(t_list, average_matrix[13-1], color='orange', linewidth=2, label="13 (CCDDDD)")
plt.plot(t_list, average_matrix[2-1], color='blue', linewidth=2, label="2 (WSLS)")
plt.plot(t_list, average_matrix[19-1], color='green', linewidth=2, label="19 (TFT)")
plt.plot(t_list, average_matrix[9-1], color='purple', linewidth=2, label="9 (STFT)")
plt.legend()
plt.show()
1位になったWSLS'(Strategy18)は、WSLSをよりALLDに強くしたもの(後述)。2位はALLD。
Strategy18:
In [19]:
def trim_mean(ts_length, aves, width):
size = ts_length.size
hist = {}
for t in ts_length:
hist[t] = hist.get(t, 0) + 1
lower_b = size * (1-width) / 2
upper_b = size * (1 - (1-width)/2)
s = 0
total = 0
for ts, num in sorted(hist.items()):
old_s = s
s += num
if old_s <= lower_b < s:
total += (s-lower_b) * aves[ts-1]
elif old_s <= upper_b < s:
total += (upper_b-old_s+1) * aves[ts-1]
elif lower_b <= s <= upper_b:
total += num * aves[ts-1]
elif s > upper_b:
break
return total / (size * width)
rounds = 1000 * 2
strategies = 24
max_ts = ts_length.max()
# 読み込み
df = pd.read_csv('./contest4/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# ts_lengthの長い順に並び替え
ordered_df = df.sortlevel(level="ts_length")
# 行: プレイヤー, 列: ts_lengthが1〜max期の時の平均利得
average_matrix = np.zeros((strategies, max_ts), dtype=float)
for t in range(1, max_ts+1):
df_t = df.iloc[df.index.get_level_values('ts_length') == t]
for s in range(1, strategies+1):
average = df_t[str(s)].mean().mean()
average_matrix[s-1, t-1] = average
for i in range(strategies):
print(trim_mean(ts_length, average_matrix[i], 0.9))
| Str_numbers | Avarage(session based) | Rank(session based) | Avarage(stage based) | Rank(stage based) | Average(90% trimmed) | Rank(trimmed) | 備考 |
|---|---|---|---|---|---|---|---|
| 18 | 3.35352416 | 1 | 3.219810292 | 1 | 3.354223602 | 1 | WSLS' |
| 13 | 3.326308014 | 2 | 3.182248494 | 2 | 3.327682945 | 2 | CCDDDD |
| 22 | 3.259068663 | 3 | 3.121244482 | 5 | 3.254935011 | 4 | |
| 14 | 3.258886509 | 4 | 3.122727237 | 4 | 3.256160683 | 3 | WSLS' |
| 1 | 3.256299103 | 5 | 3.132024724 | 3 | 3.250285117 | 5 | |
| 3 | 3.240387724 | 6 | 3.082433491 | 9 | 3.236639676 | 6 | WSLS' |
| 21 | 3.238405281 | 7 | 3.083776638 | 8 | 3.235193757 | 7 | WSLS' |
| 2 | 3.215812884 | 8 | 3.054822228 | 14 | 3.210963579 | 9 | WSLS |
| 16 | 3.215812884 | 9 | 3.054822228 | 15 | 3.210963579 | 10 | WSLS |
| 17 | 3.215547504 | 10 | 3.063675088 | 11 | 3.21302084 | 8 | TFT' |
| 19 | 3.213334955 | 11 | 3.06115156 | 12 | 3.207154339 | 11 | TFT |
| 6 | 3.197763649 | 12 | 3.056192503 | 13 | 3.193193311 | 12 | WSLS' |
| 12 | 3.197073568 | 13 | 3.045809911 | 16 | 3.178046947 | 16 | |
| 20 | 3.192768288 | 14 | 3.020367533 | 17 | 3.186697798 | 13 | WSLS' |
| 4 | 3.191465329 | 15 | 3.086214152 | 7 | 3.181888575 | 14 | |
| 23 | 3.188569289 | 16 | 3.06617612 | 10 | 3.180589184 | 15 | |
| 7 | 3.166979223 | 17 | 3.001625671 | 19 | 3.158844309 | 17 | TFT' |
| 15 | 3.161225612 | 18 | 2.983885545 | 21 | 3.151458884 | 19 | WSLS' |
| 11 | 3.159787981 | 19 | 3.004255063 | 18 | 3.154021739 | 18 | TFT' |
| 24 | 3.158548933 | 20 | 2.940998137 | 23 | 3.101466013 | 21 | |
| 10 | 3.157508886 | 21 | 2.988733446 | 20 | 3.148909213 | 20 | TFT' |
| 8 | 3.121529725 | 22 | 3.104081314 | 6 | 3.096718772 | 22 | HIST |
| 9 | 3.088360193 | 23 | 2.962527525 | 22 | 3.030839327 | 24 | STFT |
| 5 | 3.072941197 | 24 | 2.902704555 | 24 | 3.063080194 | 23 |
ほぼセッションベース平均と同じ。
結果の生データ(csv)は contest5/data
戦略は user_strategies
戦略のオートマトンは contest5/automaton5.pdf
In [20]:
# 「相手の」シグナルが協調か攻撃かを(ノイズ付きで)返す
def private_signal(actions, random_state):
pattern = [[0, 0], [0, 1], [1, 0], [1, 1]]
# 例えば実際の行動が(0, 1)なら、シグナルは(1, 0)である可能性が最も高い
signal_probs = [[.9, .02, .02, .06], [.02, .06, .9, .02], [.02, .9, .06, .02], [.06, .02, .02, .9]]
p = random_state.uniform()
if actions[0] == 0 and actions[1] == 0:
return [0, 0] if p < 0.9 else [0, 1] if p < 0.92 else [1, 0] if p < 0.94 else [1, 1]
elif actions[0] == 0 and actions[1] == 1:
return [1, 0] if p < 0.9 else [0, 0] if p < 0.92 else [1, 1] if p < 0.94 else [0, 1]
elif actions[0] == 1 and actions[1] == 0:
return [0, 1] if p < 0.9 else [1, 1] if p < 0.92 else [0, 0] if p < 0.94 else [1, 0]
elif actions[0] == 1 and actions[1] == 1:
return [1, 1] if p < 0.9 else [1, 0] if p < 0.92 else [0, 1] if p < 0.94 else [0, 0]
else:
raise ValueError
strategies = [Strategy1, Strategy2, Strategy3, Strategy4, Strategy5,
Strategy6, Strategy7, Strategy8, Strategy9, Strategy10,
Strategy11, Strategy12, Strategy13, Strategy14, Strategy15,
Strategy16, Strategy17, Strategy18, Strategy19, Strategy20,
Strategy21, Strategy22, Strategy23, Strategy24,
Iida_iprm, KatoStrategy, Self_Centered_private, ImPrivStrategy,
GrimTrigger, MyStrategy, beeleb, OyamaImperfectPrivateMonitoring, ogawa, yamagishi]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=private_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="private", random_seed=seed, record=False)
| Datetime | 2015-11-30-18-01-45 | |||||
|---|---|---|---|---|---|---|
| Monitoring type | private | |||||
| RandomSeed | 282 | |||||
| Repeats | 1000 | |||||
| Average ts_length | 32.856 | |||||
| Number of strategies | 34 | |||||
| Str_numbers | Strategy name | Avarage(session based) | Rank(session based) | Avarage(stage based) | Rank(stage based) | 備考 |
| 27 | ikegami_imperfect_private.Self_Centered_private | 3.36832832 | 1 | 3.220361024 | 1 | 20% |
| 28 | mhanami_Imperfect_Private_Strategy.ImPrivStrategy | 3.348553889 | 2 | 3.216393297 | 2 | 2T2FT |
| 25 | Iida_imperfect_private.Iida_iprm | 3.32968985 | 3 | 3.15335652 | 5 | |
| 18 | kandori.Strategy18 | 3.292214224 | 4 | 3.084777652 | 15 | WSLS' |
| 17 | kandori.Strategy17 | 3.283194443 | 5 | 3.11118305 | 8 | TFT' |
| 4 | kandori.Strategy4 | 3.282928401 | 6 | 3.158213803 | 4 | |
| 19 | kandori.Strategy19 | 3.277143381 | 7 | 3.106785994 | 10 | TFT |
| 34 | yamagishi_impd.yamagishi | 3.277143381 | 8 | 3.106785994 | 11 | TFT |
| 30 | gistfile1.MyStrategy | 3.266900169 | 9 | 3.116454907 | 7 | TFT' |
| 29 | tsuyoshi.GrimTrigger | 3.26353101 | 10 | 3.085325036 | 14 | TFT' |
| 1 | kandori.Strategy1 | 3.261856949 | 11 | 3.076891541 | 17 | |
| 23 | kandori.Strategy23 | 3.261667595 | 12 | 3.107009651 | 9 | |
| 11 | kandori.Strategy11 | 3.260228791 | 13 | 3.098884204 | 12 | TFT' |
| 32 | oyama.OyamaImperfectPrivateMonitoring | 3.255871528 | 14 | 3.087366082 | 13 | TFT' |
| 14 | kandori.Strategy14 | 3.25337761 | 15 | 3.058440504 | 18 | WSLS' |
| 33 | ogawa.ogawa | 3.247716335 | 16 | 3.083111982 | 16 | |
| 31 | beeleb_Strategy.beeleb | 3.245998144 | 17 | 3.117079764 | 6 | |
| 8 | kandori.Strategy8 | 3.239536248 | 18 | 3.180036763 | 3 | HIST |
| 22 | kandori.Strategy22 | 3.231422435 | 19 | 3.033872324 | 20 | |
| 6 | kandori.Strategy6 | 3.225318551 | 20 | 3.030270289 | 22 | WSLS' |
| 7 | kandori.Strategy7 | 3.225300065 | 21 | 3.037891239 | 19 | TFT' |
| 13 | kandori.Strategy13 | 3.223658906 | 22 | 3.020115324 | 24 | CCDDDD |
| 21 | kandori.Strategy21 | 3.216136749 | 23 | 2.999892552 | 25 | WSLS' |
| 10 | kandori.Strategy10 | 3.214150715 | 24 | 3.022255517 | 23 | TFT' |
| 3 | kandori.Strategy3 | 3.207011964 | 25 | 2.984919502 | 27 | WSLS' |
| 26 | kato.KatoStrategy | 3.193996582 | 26 | 3.032555744 | 21 | |
| 16 | kandori.Strategy16 | 3.186837934 | 27 | 2.962482753 | 29 | WSLS |
| 2 | kandori.Strategy2 | 3.186837934 | 28 | 2.962482753 | 30 | WSLS |
| 12 | kandori.Strategy12 | 3.182024045 | 29 | 2.98471429 | 28 | |
| 5 | kandori.Strategy5 | 3.151694623 | 30 | 2.947040253 | 31 | |
| 20 | kandori.Strategy20 | 3.149316992 | 31 | 2.918115136 | 32 | WSLS' |
| 9 | kandori.Strategy9 | 3.131695859 | 32 | 2.986149848 | 26 | STFT |
| 15 | kandori.Strategy15 | 3.118259636 | 33 | 2.886726561 | 33 | WSLS' |
| 24 | kandori.Strategy24 | 3.017841689 | 34 | 2.754503121 | 34 | |
In [21]:
rounds = 1000 * 2
strategies = 34
max_ts = 100
# 読み込み
df = pd.read_csv('./contest5/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# 行: プレイヤー, 列: 1000*2セッション分の平均利得
average_matrix = np.zeros((rounds*(strategies-1), strategies), dtype=float)
for s in range(1, strategies+1):
for i, opponent in enumerate(df[str(s)].columns.values):
average_matrix[i*rounds:(i+1)*rounds, s-1] = df[str(s)][str(opponent)]
averages = np.zeros(strategies, dtype=float)
stds = np.zeros(strategies, dtype=float)
ranking = np.zeros(strategies, dtype=int)
for i in range(strategies):
averages[i] = average_matrix[:, i].mean()
stds[i] = average_matrix[:, i].std()
ranking = np.argsort(averages)[::-1]+1
fig, ax = plt.subplots(figsize=(28, 12))
bp = ax.boxplot(average_matrix, 0, '')
plt.grid()
plt.xlabel('戦略番号')
plt.ylabel('1セッションの平均利得')
ax.set_xlim([0, strategies+0.5])
ax.set_ylim([-0.1, 5.8])
plt.title('戦略別, 全セッションの平均利得の分布')
ax.text(0.1, 5.3, "ranking\nave\nstd", ha = 'center', va = 'center', color="black", size=14)
for i in range(strategies):
ax.text(i+1, 5.3, "{0:.0f}\n{1:.3f}\n{2:.3f}"
.format(np.where(ranking == i+1)[0][0]+1, averages[i], stds[i]), ha = 'center', va = 'center', color="black", size=14)
plt.show()
| str number | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| rank | 11 | 28 | 25 | 6 | 30 | 20 | 21 | 18 | 32 | 24 | 13 | 29 |
| count | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 |
| mean | 3.261857 | 3.186838 | 3.207012 | 3.282928 | 3.151695 | 3.225319 | 3.2253 | 3.239536 | 3.131696 | 3.214151 | 3.260229 | 3.182024 |
| std | 0.820985 | 0.901844 | 0.871601 | 0.838379 | 0.971843 | 0.941597 | 0.764253 | 0.904268 | 0.847703 | 0.771735 | 0.815696 | 0.743275 |
| min | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 25% | 2.67681 | 2.627406 | 2.638741 | 2.736842 | 2.7 | 2.863481 | 2.650262 | 2.701754 | 2.407407 | 2.638889 | 2.666667 | 2.6 |
| 50% | 3.535401 | 3.514286 | 3.52 | 3.6 | 3.375 | 3.56697 | 3.309091 | 3.578947 | 3 | 3.285714 | 3.51835 | 3.16 |
| 75% | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 3.894737 | 4 | 4 | 3.641026 |
| max | 4.923077 | 4.916667 | 4.909091 | 4.166667 | 4.25 | 4.777778 | 4.75 | 4.111111 | 5 | 4.75 | 4.333333 | 4.818182 |
| str number | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| rank | 22 | 15 | 33 | 27 | 5 | 4 | 7 | 31 | 23 | 19 | 12 | 34 |
| count | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 |
| mean | 3.223659 | 3.253378 | 3.11826 | 3.186838 | 3.283194 | 3.292214 | 3.277143 | 3.149317 | 3.216137 | 3.231422 | 3.261668 | 3.017842 |
| std | 0.734062 | 0.861139 | 0.900098 | 0.901844 | 0.741322 | 0.766913 | 0.738413 | 0.90132 | 0.886843 | 0.927439 | 0.946034 | 1.013924 |
| min | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 25% | 2.583333 | 2.833333 | 2.585366 | 2.627406 | 2.714286 | 2.72093 | 2.681818 | 2.6 | 2.703019 | 2.861538 | 2.918919 | 2.333333 |
| 50% | 3.333333 | 3.555556 | 3.333333 | 3.514286 | 3.457143 | 3.520833 | 3.454545 | 3.416667 | 3.538462 | 3.578947 | 3.627451 | 3.153846 |
| 75% | 3.85 | 4 | 3.857143 | 4 | 4 | 4 | 4 | 3.916667 | 4 | 4 | 4 | 3.666667 |
| max | 4.894737 | 4.916667 | 4.909091 | 4.916667 | 4.4 | 4.923077 | 4.666667 | 4.916667 | 4.923077 | 4.933333 | 4.2 | 5 |
| str number | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
|---|---|---|---|---|---|---|---|---|---|---|
| rank | 3 | 26 | 1 | 2 | 10 | 9 | 17 | 14 | 16 | 8 |
| count | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 | 66000 |
| mean | 3.32969 | 3.193997 | 3.368328 | 3.348554 | 3.263531 | 3.2669 | 3.245998 | 3.255872 | 3.247716 | 3.277143 |
| std | 0.746131 | 0.760182 | 0.690123 | 0.700335 | 0.857724 | 0.879561 | 0.856646 | 0.793313 | 0.779358 | 0.738413 |
| min | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 25% | 2.9 | 2.577778 | 2.829268 | 2.888889 | 2.857143 | 2.75 | 2.608696 | 2.692308 | 2.680851 | 2.681818 |
| 50% | 3.458333 | 3.214286 | 3.531915 | 3.507692 | 3.555556 | 3.604167 | 3.58209 | 3.47619 | 3.41791 | 3.454545 |
| 75% | 4 | 3.794118 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |
| max | 4.9 | 4.8 | 4.923077 | 4.4 | 4.923077 | 4.25 | 4.1 | 4.9 | 4.5 | 4.666667 |
In [22]:
rounds = 1000 * 2
strategies = 34
max_ts = 100
# 読み込み
df = pd.read_csv('./contest5/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# ts_lengthの長い順に並び替え
ordered_df = df.sortlevel(level="ts_length")
# 行: プレイヤー, 列: ts_lengthが1〜100期の時の平均利得
average_matrix = np.zeros((strategies, max_ts), dtype=float)
for t in range(1, max_ts+1):
df_t = df.iloc[df.index.get_level_values('ts_length') == t]
for s in range(1, strategies+1):
average = df_t[str(s)].mean().mean()
average_matrix[s-1, t-1] = average
fig, ax = plt.subplots(figsize=(20, 10))
plt.title("average payoff trend")
plt.xlabel("ts_length")
plt.ylabel("average payoff")
t_list = [i for i in range(1, max_ts+1)]
for s in range(1, strategies+1):
if s in [27, 28, 18, 13, 9, 8]:
pass
else:
average_list = average_matrix[s-1]
plt.plot(t_list, average_list, color='#bbbbbb')
plt.plot(t_list, average_matrix[27-1], color='red', linewidth=2, label="27 (20%)")
plt.plot(t_list, average_matrix[28-1], color='blue', linewidth=2, label="28 (2T2FT)")
plt.plot(t_list, average_matrix[19-1], color='magenta', linewidth=2, label="19 (TFT)")
plt.plot(t_list, average_matrix[18-1], color='green', linewidth=2, label="18 (WSLS’)")
plt.plot(t_list, average_matrix[13-1], color='purple', linewidth=2, label="13 (CCDDDD)")
plt.plot(t_list, average_matrix[9-1], color='brown', linewidth=2, label="9 (STFT)")
plt.plot(t_list, average_matrix[8-1], color='orange', linewidth=2, label="8 (HIST)")
plt.legend()
plt.show()
In [23]:
def trim_mean(ts_length, aves, width):
size = ts_length.size
hist = {}
for t in ts_length:
hist[t] = hist.get(t, 0) + 1
lower_b = size * (1-width) / 2
upper_b = size * (1 - (1-width)/2)
s = 0
total = 0
for ts, num in sorted(hist.items()):
old_s = s
s += num
if old_s <= lower_b < s:
total += (s-lower_b) * aves[ts-1]
elif old_s <= upper_b < s:
total += (upper_b-old_s+1) * aves[ts-1]
elif lower_b <= s <= upper_b:
total += num * aves[ts-1]
elif s > upper_b:
break
return total / (size * width)
rounds = 1000 * 2
strategies = 34
max_ts = ts_length.max()
# 読み込み
df = pd.read_csv('./contest5/data/set_result.csv', index_col=[0, 1], header=[0, 1])
# ts_lengthの長い順に並び替え
ordered_df = df.sortlevel(level="ts_length")
# 行: プレイヤー, 列: ts_lengthが1〜max期の時の平均利得
average_matrix = np.zeros((strategies, max_ts), dtype=float)
for t in range(1, max_ts+1):
df_t = df.iloc[df.index.get_level_values('ts_length') == t]
for s in range(1, strategies+1):
average = df_t[str(s)].mean().mean()
average_matrix[s-1, t-1] = average
for i in range(strategies):
print(trim_mean(ts_length, average_matrix[i], 0.9))
| Str_numbers | Avarage(session based) | Rank(session based) | Avarage(stage based) | Rank(stage based) | Average(90% trimmed) | Rank(trimmed) | 備考 |
|---|---|---|---|---|---|---|---|
| 27 | 3.368 | 1 | 3.220 | 1 | 3.366 | 1 | 20% |
| 28 | 3.349 | 2 | 3.216 | 2 | 3.346 | 2 | 2T2FT |
| 25 | 3.330 | 3 | 3.153 | 5 | 3.331 | 3 | |
| 18 | 3.292 | 4 | 3.085 | 15 | 3.292 | 4 | WSLS' |
| 17 | 3.283 | 5 | 3.111 | 8 | 3.282 | 5 | TFT' |
| 4 | 3.283 | 6 | 3.158 | 4 | 3.276 | 6 | |
| 19 | 3.277 | 7 | 3.107 | 10 | 3.272 | 8 | TFT |
| 34 | 3.277 | 8 | 3.107 | 11 | 3.272 | 7 | TFT |
| 30 | 3.267 | 9 | 3.116 | 7 | 3.261 | 9 | 2TFT' |
| 29 | 3.264 | 10 | 3.085 | 14 | 3.261 | 10 | TFT' |
| 1 | 3.262 | 11 | 3.077 | 17 | 3.258 | 11 | |
| 23 | 3.262 | 12 | 3.107 | 9 | 3.257 | 12 | |
| 11 | 3.260 | 13 | 3.099 | 12 | 3.256 | 13 | TFT' |
| 32 | 3.256 | 14 | 3.087 | 13 | 3.252 | 15 | TFT' |
| 14 | 3.253 | 15 | 3.058 | 18 | 3.252 | 14 | WSLS' |
| 33 | 3.248 | 16 | 3.083 | 16 | 3.243 | 16 | |
| 31 | 3.246 | 17 | 3.117 | 6 | 3.237 | 17 | |
| 8 | 3.240 | 18 | 3.180 | 3 | 3.223 | 20 | HIST |
| 22 | 3.231 | 19 | 3.034 | 20 | 3.227 | 18 | |
| 6 | 3.225 | 20 | 3.030 | 22 | 3.223 | 19 | WSLS' |
| 7 | 3.225 | 21 | 3.038 | 19 | 3.218 | 22 | TFT' |
| 13 | 3.224 | 22 | 3.020 | 24 | 3.219 | 21 | CCDDDD |
| 21 | 3.216 | 23 | 3.000 | 25 | 3.214 | 23 | WSLS' |
| 10 | 3.214 | 24 | 3.022 | 23 | 3.207 | 24 | TFT' |
| 3 | 3.207 | 25 | 2.985 | 27 | 3.203 | 25 | WSLS' |
| 26 | 3.194 | 26 | 3.033 | 21 | 3.173 | 28 | |
| 16 | 3.187 | 27 | 2.962 | 29 | 3.182 | 27 | WSLS |
| 2 | 3.187 | 28 | 2.962 | 30 | 3.182 | 26 | WSLS |
| 12 | 3.182 | 29 | 2.985 | 28 | 3.162 | 29 | |
| 5 | 3.152 | 30 | 2.947 | 31 | 3.146 | 30 | |
| 20 | 3.149 | 31 | 2.918 | 32 | 3.141 | 31 | WSLS' |
| 9 | 3.132 | 32 | 2.986 | 26 | 3.076 | 33 | STFT |
| 15 | 3.118 | 33 | 2.887 | 33 | 3.106 | 32 | WSLS' |
| 24 | 3.018 | 34 | 2.755 | 34 | 2.954 | 34 |
セッション平均とほぼ同じ。
In [24]:
class TFT(object):
def __init__(self, random_state=None):
if random_state is None:
random_state = np.random.RandomState()
self.random_state = random_state
self.signal = 0
def play(self):
return self.signal
def get_signal(self, signal):
self.signal = signal
class WSLS(object):
def __init__(self, random_state=None):
if random_state is None:
random_state = np.random.RandomState()
self.random_state = random_state
self.my_action = 0
self.signal = 0
def play(self):
if self.signal == 1:
self.my_action = 1 - self.my_action
return self.my_action
else:
return self.my_action
def get_signal(self, signal):
self.signal = signal
# 「相手の」シグナルが協調か攻撃かを(ノイズ付きで)返す
def private_signal(actions, random_state):
pattern = [[0, 0], [0, 1], [1, 0], [1, 1]]
# 例えば実際の行動が(0, 1)なら、シグナルは(1, 0)である可能性が最も高い
signal_probs = [[.9, .02, .02, .06], [.02, .06, .9, .02], [.02, .9, .06, .02], [.06, .02, .02, .9]]
p = random_state.uniform()
if actions[0] == 0 and actions[1] == 0:
return [0, 0] if p < 0.9 else [0, 1] if p < 0.92 else [1, 0] if p < 0.94 else [1, 1]
elif actions[0] == 0 and actions[1] == 1:
return [1, 0] if p < 0.9 else [0, 0] if p < 0.92 else [1, 1] if p < 0.94 else [0, 1]
elif actions[0] == 1 and actions[1] == 0:
return [0, 1] if p < 0.9 else [1, 1] if p < 0.92 else [0, 0] if p < 0.94 else [1, 0]
elif actions[0] == 1 and actions[1] == 1:
return [1, 1] if p < 0.9 else [1, 0] if p < 0.92 else [0, 1] if p < 0.94 else [0, 0]
else:
raise ValueError
strategies = [TFT, TFT, WSLS, WSLS]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=private_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="private", random_seed=seed, record=False)
スコアテーブル(セッション平均)
| TFT | TFT | WSLS | WSLS | |
|---|---|---|---|---|
| TFT | 0 | 3.225 | 3.241 | 3.241 |
| TFT | 3.225 | 0 | 3.241 | 3.241 |
| WSLS | 3.218 | 3.218 | 0 | 3.783 |
| WSLS | 3.218 | 3.218 | 3.783 | 0 |
となって、WSLSの方が平均利得が高くなる。なぜ?
TFT:
WSLS:
(∵) WSLS, TFTともに、誤ったシグナルが伝わらない限りは協調を続ける。対戦を、
の3つに分類する。
TFT vs TFT
両方同時に間違ったシグナルが出た場合、次期以降は(次に誤ったシグナルが出るまで)両者ともずっとDを出し続ける。
片方だけに間違ったシグナルが出た場合、次期以降は両者が交互にCとDを出す。
WSLS vs TFT
両方同時に誤ったシグナルが出た場合、次期以降、TFTはD→D→C→D→D→C→……、WSLSはD→C→D→D→C→D→…… という均衡になる。
片方だけに間違ったシグナルが出た場合も、同様のパターンに落ち着く。
WSLS vs WSLS
両方同時に間違ったシグナルが出た場合、双方1回裏切りの後、再び協調に戻る。
片方だけに間違ったシグナルが出た場合は、(C ,D)→(D, D)→(C, C)となって、ふたたび協調に戻る。
つまり、 WSLS vs WSLSはシグナルの間違いに強く同戦略同士での協調がしやすい ため、その両者の対戦のスコアが平均利得を押し上げたと考えられる。 実際、スコアテーブルを見れば、3の対戦だけが突出して平均利得が高くなっている(1と2のパターンの対戦スコアはほぼ同じ)
In [25]:
class ALLD(object):
def __init__(self, random_state=None):
if random_state is None:
random_state = np.random.RandomState()
self.random_state = random_state
def play(self):
return 1
def get_signal(self, signal):
pass
strategies = [TFT, TFT, WSLS, WSLS, ALLD]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=private_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="private", random_seed=seed, record=False)
スコアテーブル:
| TFT | TFT | WSLS | WSLS | ALLD | |
|---|---|---|---|---|---|
| TFT | 0 | 3.225 | 3.241 | 3.241 | 1.625 |
| TFT | 3.225 | 0 | 3.241 | 3.241 | 1.625 |
| WSLS | 3.218 | 3.218 | 0 | 3.783 | 0.932 |
| WSLS | 3.218 | 3.218 | 3.783 | 0 | 0.932 |
| ALLD | 2.562 | 2.562 | 3.602 | 3.602 | 0 |
TFT, WSLSとALLDの対戦を考える。
TFT vs ALLD
誤ったシグナルが出されないかぎり、TFTはC→D→D→D→……となる。
誤ったシグナル(良いシグナル)がTFTに対して出た場合、TFTは1期協調した後、再び攻撃に戻る。結果、TFTの方がALLDよりもいくらか利得が少なくなる。
WSLS vs ALLD
WSLSは誤ったシグナルが出ないかぎり、ずっとC→D→C→D→……を繰り返す。したがって、WSLSはALLDに大きく利得を吸い取られる。
ALLDがWSLSの大きな弱点となっているため、WSLSとTFTだけのケースに比べ、上のゲームでは相対的にTFTの順位が高くなっている。
In [26]:
strategies = [TFT, TFT, TFT, TFT, TFT, TFT, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, ALLD]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=private_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="private", random_seed=seed, record=False)
WSLS: セッション平均: 3.257
ALLD: セッション平均: 3.046
TFT: セッション平均: 2.895
となった。これは、WSLS同士の対戦で得られる高い利得 > WSLSがALLDに吸い取られる利得 となったため。
スコアテーブルを戦略のタイプ別に集計すると、
| タイプ別平均 | ||||||
|---|---|---|---|---|---|---|
| WSLS | TFT | ALLD | Other kandori | total average | ||
| WSLS | 3.121197699 | 3.1719659 | 1.597288599 | 3.241082345 | 3.230509666 | |
| TFT | 3.156388186 | 2.745320385 | 2.102810448 | 3.289310268 | 3.16691979 | |
| ALLD | 3.568115816 | 2.766558346 | 3.474086488 | 3.326308014 | ||
| Other kandori | 2.843888455 | 2.874368906 | 1.404262478 | 2.482345208 | 2.773649588 | |
となった。神取ゼミのWSLS, TFT, ALLD以外の8戦略(Other kandori)は3戦略にそれほど大きな影響を与えていないことがわかる。したがって、3タイプだけで元実験を近似できている。
一般に、WSLSが多く、ALLDが少ない環境では、WSLSは高い利得を得られる。
特に戦略18は、通常のWSLSに比べてALLDに強く、1位になった要因だと考えられる。
Strategy18:
実験4のケースと同様に、TFT×11, WSLS×9, ALLD×1 で実験してみると、
In [27]:
strategies = [TFT, TFT, TFT, TFT, TFT, TFT, TFT, TFT, TFT, TFT, TFT, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, WSLS, ALLD]
game = pl.RepeatedMatrixGame(payoff, strategies, signal=private_signal, ts_length=ts_length, repeat=1000)
game.play(mtype="private", random_seed=seed, record=False)
WSLS: セッション平均 3.330
TFT: セッション平均 2.913
ALLD: セッション平均 2.867
となり、実験5とは異なる結果となった。
| タイプ別セッション平均 | ||||||
|---|---|---|---|---|---|---|
| WSLS | TFT | ALLD | Other kandori | Other oyama | total average | |
| WSLS | 3.121197699 | 3.294184513 | 1.597288599 | 3.241082345 | 2.844703858 | 3.20392351 |
| TFT | 3.217917263 | 3.135976169 | 1.999373767 | 3.280819084 | 3.198661754 | 3.254883021 |
| ALLD | 3.568115816 | 2.947953446 | 3.474086488 | 2.80950435 | 3.223658906 | |
| Other kandori | 3.281705328 | 3.384906732 | 1.61449243 | 2.850495713 | 2.977608253 | 3.203621498 |
| Other oyama | 3.290522046 | 3.369423514 | 2.085169256 | 3.355591673 | 2.611990254 | 3.277145846 |
スコアテーブルを戦略のタイプごとに集計し直すと、実験5で「WSLSの利得 < TFTの利得」となった要因は、尾山ゼミのWSLS, TFT, ALLDでない残りの5戦略が原因であることがわかる。
Prob :=「過去全ての履歴の内◯◯%以上BならDを出す戦略」は、どのモニタリングタイプの実験でも安定して高順位。なぜ?
TFT:
WSLS:
各プレイヤーの戦略がTFT, WSLS, ALLD, Probのみの場合を考える。
Prob vs TFT
間違ったシグナルが出ない限り、協調を続ける。
Probだけに間違ったシグナルが出た場合、現在の期数によっては、両者ともDを出し続ける経路へ移る。
TFTだけに間違ったシグナルが出た場合, 両者ともに間違ったシグナルが出た場合もほぼ同様。
Prob vs WSLS
間違ったシグナルが出ないかぎり、協調を続ける。
Probに間違ったシグナルが出た場合、現在の期数によっては、ProbはDを出し続ける経路へ移る。この時、WSLSはCとDを交互に出す経路に行くので、ProbはWSLSから多くの利得を奪うことが出来る(ALLDがWSLSに対して有利なのと同様)。
更にWSLSのみ、あるいは両者に間違ったシグナルが出た場合、現在の期数によらず、しばらくWSLSがDを出し続けた後、上述の均衡へ必ず移行する。
Prob vs ALLD
両者ともほぼずっとDを出し続ける均衡で落ち着く。
Prob vs Prob
間違ったシグナルが出ないかぎり、協調を続ける。
早い期で間違ったシグナルが出た場合は両者Dを出しあう均衡へ移行する。そうでない場合はCを出し続ける。
つまり、ProbはTFT, ALLDおよびProb同士との対戦ではほぼ同等の利得を得、さらにWSLSとの対戦では多くの利得を得ることが出来る。したがって、各プレイヤーの戦略がこの4つのタイプのみに分類される場合、Probは(Dに移行する確率が何%であれ)悪くない戦略だと考えられる。ただし実際にどの戦略が1位になるかは、全ての戦略に占める各タイプの割合による。
| タイプ別平均 | |||||||
|---|---|---|---|---|---|---|---|
| WSLS | TFT | ALLD | Prob | Other kandori | Other oyama | total average | |
| WSLS | 3.121197699 | 3.294184513 | 1.597288599 | 2.559611894 | 3.241082345 | 2.915976849 | 3.20392351 |
| TFT | 3.217917263 | 3.135976169 | 1.999373767 | 2.99366014 | 3.280819084 | 3.249912158 | 3.254883021 |
| ALLD | 3.568115816 | 2.947953446 | 2.536744069 | 3.474086488 | 2.87769442 | 3.223658906 | |
| Prob | 3.553529215 | 3.278944542 | 2.286598783 | 3.513118831 | 3.178283059 | 3.36832832 | |
| Other kandori | 3.281705328 | 3.384906732 | 1.61449243 | 2.727741801 | 2.850495713 | 3.040074867 | 3.203621498 |
| Other oyama | 3.224770254 | 3.392043256 | 2.034811875 | 2.927390992 | 3.316209883 | 2.55100466 | 3.254350228 |
実験5のスコアテーブルを再度タイプ別に集計すると、ProbはTFT, ALLDとの対戦でそれなりの利得を得、WSLSに対してはALLDなみに高い利得をえていることがわかる。
In [ ]: