In [1]:
%matplotlib inline
import numpy as np
import pylab as pl
import math
from sympy import *
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D
In [62]:
import numpy as np
import pylab as pl
import math
from sympy import *
class RNN:
def __init__(self):
pass
def set_input_layer(self, x):
self.input_layer = np.zeros(x)
return self.input_layer
def set_output_layer(self, x):
self.output_layer = np.zeros(x)
self.before_output_layer = np.zeros(x)
self.supervised_data = np.zeros(x)
return self.output_layer, self.before_output_layer, self.supervised_data
def set_hidden_layer(self, x):
self.hidden_layer = np.zeros(x)
self.before_hidden_layer = np.zeros(x)
return self.hidden_layer, self.before_hidden_layer
def setup(self):
w_k = np.zeros(len(self.output_layer))
self.w_kj = np.array([w_k for i in range(len(self.hidden_layer))])
w_j = np.zeros(len(self.hidden_layer))
self.w_ji = np.array([w_j for i in range(len(self.input_layer))])
return self.w_kj, self.w_ji
def initialize(self, hidden=None):
for i in range(len(self.hidden_layer)):
for j in range(len(self.output_layer)):
self.w_kj[i][j] = np.random.uniform(-1.0/math.sqrt(1.0/len(self.hidden_layer)), 1.0/math.sqrt(1.0/len(self.hidden_layer)))
for i in range(len(self.input_layer)):
for j in range(len(self.hidden_layer)):
self.w_ji[i][j] = np.random.uniform(-1.0/math.sqrt(1.0/len(self.input_layer)), 1.0/math.sqrt(1.0/len(self.input_layer)))
if hidden is None:
u = Symbol('u')
self.hfunction = 1/(1+exp(-u))
self.diff_hf = diff(self.hfunction)
else:
self.hfunction = hidden
self.diff_hf = diff(self.hfunction)
def supervised_function(self, sdata):
for i in range(len(self.supervised_data)):
self.supervised_data[i] = sdata[i]
def set_hidden_error(self, j):
u = Symbol("u")
diff_hf = self.diff_hf
hidden_error = 0
for k in range(len(self.output_layer)):
delta_z = diff_hf.subs([(u, self.before_output_layer[k])])
hidden_error += self.w_kj[j][k]*(self.supervised_data[k] - self.output_layer[k])*delta_z
return hidden_error
def calculation(self):
u = Symbol("u")
hfunction = self.hfunction
diff_hf = self.diff_hf
for i in range(len(self.input_layer)):
self.before_hidden_layer = np.matrix(self.w_ji).T*np.matrix(self.input_layer).T
for i in range(len(self.hidden_layer)):
self.hidden_layer[i] = hfunction.subs([(u, self.before_hidden_layer[i])])
for i in range(len(self.before_output_layer)):
self.before_output_layer = np.matrix(self.w_kj).T*np.matrix(self.hidden_layer).T
for i in range(len(self.output_layer)):
self.output_layer[i] = hfunction.subs([(u, self.before_output_layer[i])])
def output_ad(self):
u = Symbol("u")
hfunction = self.hfunction
diff_hf = self.diff_hf
eta = self.eta
for j in range(len(self.hidden_layer)):
for k in range(len(self.output_layer)):
delta_J = self.supervised_data[k] - self.output_layer[k]
delta_z = self.output_layer[k]*(1-self.output_layer[k])
delta_v = self.hidden_layer[j]
self.w_kj[j][k] += eta*delta_J*delta_z*delta_v
def input_ad(self):
u = Symbol("u")
hfunction = self.hfunction
diff_hf = self.diff_hf
eta = self.eta
for i in range(len(self.input_layer)):
for j in range(len(self.hidden_layer)):
hidden_error = self.set_hidden_error(j)
delta_y = self.hidden_layer[j]*(1-self.hidden_layer[j])
delta_u = self.input_layer[i]
self.w_ji[i][j] += eta*hidden_error*delta_y*delta_u
def simulate(self, idata, sdata, eta):
self.eta = eta
self.thidden = np.array([])
self.toutput = np.array([])
for i in range(len(idata)):
self.supervised_function(sdata[i])
for j in range(len(idata[i])):
self.input_layer[j] = idata[-i-1][j]
for j in range(len(self.hidden_layer)):
self.input_layer[len(idata[-i-1])+j-1] = self.hidden_layer[j]
self.calculation()
self.output_ad()
self.calculation()
self.input_ad()
return self.output_layer
def main(self, idata, sdata, eta, N, i=2, h=2, o=1):
self.set_input_layer(i+h)
self.set_hidden_layer(h)
self.set_output_layer(o)
self.setup()
self.initialize()
return self.output_layer[0]
def set_network(self, i=2, h=5, o=1):
self.set_input_layer(i+h)
self.set_hidden_layer(h)
self.set_output_layer(o)
self.setup()
self.initialize()
def main2(self, idata, sdata, eta):
self.simulate(idata, sdata, eta)
return self.output_layer[0]
def realize(self, idata):
for i in range(len(idata)):
for j in range(len(idata[-i])):
self.input_layer[j] = idata[i][j]
for j in range(len(self.hidden_layer)):
self.input_layer[len(idata[i])+j-1] = self.hidden_layer[j]
self.calculation()
return self.output_layer[0]
ゲーム理論で用いられるTit for Tatを再現してみる。二人のプレーヤーが互いにRNNで相手の行動を予測し、相手の行動に対してTit for Tatに基づいた行動を選択する。
最初の行動はRNNで指定できないので、所与となる。この初期値と裏切りに対する感応度で収束の仕方が決まる。
協調を1、裏切りを0としている。RNNの予測値は整数値でないが、p=(RNNの出力値)で次回に協調を行う。
例1:1期目に、プレーヤー1が協力、プレーヤー2が裏切り。
In [109]:
nn1 = RNN()
nn1.set_network()
nn2 = RNN()
nn2.set_network()
idata1 = [[1, 0]]
idata2 = [[0, 1]]
sdata1 = [[0]]
sdata2 = [[1]]
for t in range(20):
for i in range(10):
nn1.main2(idata1, sdata2, 0.9)
nn2.main2(idata2, sdata1, 0.9)
idata1.append([sdata1[-1][0], sdata2[-1][0]])
idata2.append([idata1[-1][1], idata1[-1][0]])
n1r = nn1.realize(idata1)
n2r = nn2.realize(idata1)
sdata1.append([np.random.choice([1, 0], p=[n1r, 1-n1r])])
sdata2.append([np.random.choice([1, 0], p=[n2r, 1-n2r])])
idata.append([sdata1[-1][0], sdata2[-1][0]])
print nn1.realize(idata1), nn2.realize(idata), idata1
In [ ]:
下の図より、最初は交互に相手にしっぺ返しをしているが、やがて両者が裏切り合うこと状態に収束する。
In [111]:
p1 = []
p2 = []
for i in range(len(idata1)):
p1.append(idata1[i][0])
for i in range(len(idata2)):
p2.append(idata2[i][0])
plt.plot(p1)
plt.plot(p2)
Out[111]:
例2:1期目に、プレーヤー1が協力、プレーヤー2が協力。ただし、プレーヤー2は相手の裏切りをかなり警戒している。
警戒を表すためにp=(RNNの出力値 - 0.2)とする。p<0の場合はp=0に直す。
In [112]:
nn1 = RNN()
nn1.set_network()
nn2 = RNN()
nn2.set_network()
idata1 = [[1, 1]]
idata2 = [[1, 1]]
sdata1 = [[1]]
sdata2 = [[1]]
for t in range(20):
for i in range(10):
nn1.main2(idata1, sdata2, 0.9)
nn2.main2(idata2, sdata1, 0.9)
idata1.append([sdata1[-1][0], sdata2[-1][0]])
idata2.append([idata1[-1][1], idata1[-1][0]])
n1r = nn1.realize(idata1)
n2r = nn2.realize(idata1)
prob1 = n1r
prob2 = n2r - 0.3
if prob2 < 0:
prob2 = 0
sdata1.append([np.random.choice([1, 0], p=[prob1, 1-prob1])])
sdata2.append([np.random.choice([1, 0], p=[prob2, 1-prob2])])
idata.append([sdata1[-1][0], sdata2[-1][0]])
print nn1.realize(idata1), nn2.realize(idata), idata1
In [113]:
p1 = []
p2 = []
for i in range(len(idata1)):
p1.append(idata1[i][0])
for i in range(len(idata2)):
p2.append(idata2[i][0])
plt.plot(p1)
plt.plot(p2)
Out[113]:
例3:次に相手の行動を完全には観測できない場合を考える。t期の相手の行動をt+1期にノイズが加わって知る。例えば、1期目に相手が協調したことを、確率90%で2期目に正しく知れるが、10%で裏切りと誤って伝わる場合である。
ノイズは20%の確率で加わるものとする。その他の条件は例1と同じにした。
In [116]:
nn1 = RNN()
nn1.set_network()
nn2 = RNN()
nn2.set_network()
idata1 = [[1, 0]]
idata2 = [[0, 1]]
sdata1 = [[0]]
sdata2 = [[1]]
for t in range(20):
for i in range(10):
nn1.main2(idata1, sdata2, 0.9)
nn2.main2(idata2, sdata1, 0.9)
idata1.append([sdata1[-1][0], np.random.choice([sdata2[-1][0], 1-sdata2[-1][0]], p=[0.8, 0.2])])
idata2.append([sdata2[-1][0], np.random.choice([sdata1[-1][0], 1-sdata1[-1][0]], p=[0.8, 0.2])])
n1r = nn1.realize(idata1)
n2r = nn2.realize(idata1)
prob1 = n1r
prob2 = n2r
sdata1.append([np.random.choice([1, 0], p=[prob1, 1-prob1])])
sdata2.append([np.random.choice([1, 0], p=[prob2, 1-prob2])])
idata.append([sdata1[-1][0], sdata2[-1][0]])
print nn1.realize(idata1), nn2.realize(idata), idata1
In [117]:
p1 = []
p2 = []
for i in range(len(idata1)):
p1.append(idata1[i][0])
for i in range(len(idata2)):
p2.append(idata2[i][0])
plt.plot(p1)
plt.plot(p2)
Out[117]:
In [88]:
print sdata1, sdata2
In [ ]:
In [78]:
j = np.array([np.array([0])])
j = np.append(j, np.array([1]))
In [79]:
j
Out[79]:
In [ ]: