In [1]:
%matplotlib inline

import numpy as np
import pylab as pl
import math 
from sympy import *
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D

In [62]:
import numpy as np
import pylab as pl
import math 
from sympy import *

class RNN:
    def __init__(self):
        pass
    
    def set_input_layer(self, x):
        self.input_layer = np.zeros(x)
        return self.input_layer
    
    def set_output_layer(self, x):
        self.output_layer = np.zeros(x)
        self.before_output_layer = np.zeros(x)
        self.supervised_data = np.zeros(x)
        return self.output_layer, self.before_output_layer, self.supervised_data
    
    def set_hidden_layer(self, x):
        self.hidden_layer = np.zeros(x)
        self.before_hidden_layer = np.zeros(x)
        return self.hidden_layer, self.before_hidden_layer
    
    def setup(self):
        w_k = np.zeros(len(self.output_layer))
        self.w_kj = np.array([w_k for i in range(len(self.hidden_layer))])
        w_j = np.zeros(len(self.hidden_layer))
        self.w_ji = np.array([w_j for i in range(len(self.input_layer))])
        return self.w_kj, self.w_ji
        
    def initialize(self, hidden=None):
        for i in range(len(self.hidden_layer)):
            for j in range(len(self.output_layer)):
                self.w_kj[i][j] = np.random.uniform(-1.0/math.sqrt(1.0/len(self.hidden_layer)), 1.0/math.sqrt(1.0/len(self.hidden_layer)))
            
        for i in range(len(self.input_layer)):
            for j in range(len(self.hidden_layer)):
                self.w_ji[i][j] = np.random.uniform(-1.0/math.sqrt(1.0/len(self.input_layer)), 1.0/math.sqrt(1.0/len(self.input_layer)))
     
        if hidden is None:
            u = Symbol('u')
            self.hfunction = 1/(1+exp(-u))
            self.diff_hf = diff(self.hfunction)
        else:
            self.hfunction = hidden
            self.diff_hf = diff(self.hfunction)
    
    def supervised_function(self, sdata):
        for i in range(len(self.supervised_data)):
            self.supervised_data[i] = sdata[i]
            
    def set_hidden_error(self, j):
        u = Symbol("u")
        diff_hf = self.diff_hf 
        hidden_error = 0
        for k in range(len(self.output_layer)):
            delta_z = diff_hf.subs([(u, self.before_output_layer[k])]) 
            hidden_error += self.w_kj[j][k]*(self.supervised_data[k] - self.output_layer[k])*delta_z
        return hidden_error
        
    def calculation(self):
        u = Symbol("u")
        hfunction = self.hfunction
        diff_hf = self.diff_hf
        
        for i in range(len(self.input_layer)):
            self.before_hidden_layer = np.matrix(self.w_ji).T*np.matrix(self.input_layer).T
            
        for i in range(len(self.hidden_layer)):
            self.hidden_layer[i] = hfunction.subs([(u, self.before_hidden_layer[i])])
            
        for i in range(len(self.before_output_layer)):
            self.before_output_layer = np.matrix(self.w_kj).T*np.matrix(self.hidden_layer).T
                                                   
        for i in range(len(self.output_layer)):
            self.output_layer[i] = hfunction.subs([(u, self.before_output_layer[i])]) 
                
    def output_ad(self):
        u = Symbol("u")
        hfunction = self.hfunction
        diff_hf = self.diff_hf 
        
        eta = self.eta
        for j in range(len(self.hidden_layer)):
            for k in range(len(self.output_layer)):
                delta_J = self.supervised_data[k] - self.output_layer[k]
                delta_z = self.output_layer[k]*(1-self.output_layer[k])
                delta_v = self.hidden_layer[j]
                self.w_kj[j][k] += eta*delta_J*delta_z*delta_v
    
    def input_ad(self):  
        u = Symbol("u")
        hfunction = self.hfunction
        diff_hf = self.diff_hf 
        
        eta = self.eta
        for i in range(len(self.input_layer)):
            for j in range(len(self.hidden_layer)):
                hidden_error = self.set_hidden_error(j)
                delta_y = self.hidden_layer[j]*(1-self.hidden_layer[j])
                delta_u = self.input_layer[i]
                self.w_ji[i][j] += eta*hidden_error*delta_y*delta_u
                
    def simulate(self, idata, sdata, eta):
        self.eta = eta
        self.thidden = np.array([])
        self.toutput = np.array([])
        for i in range(len(idata)):
            self.supervised_function(sdata[i])
            for j in range(len(idata[i])):
                self.input_layer[j] = idata[-i-1][j]
            for j in range(len(self.hidden_layer)):
                self.input_layer[len(idata[-i-1])+j-1] = self.hidden_layer[j]
            self.calculation()
            self.output_ad()
            self.calculation()
            self.input_ad()
        return self.output_layer
    
    def main(self, idata, sdata, eta, N, i=2, h=2, o=1):
        self.set_input_layer(i+h)
        self.set_hidden_layer(h)
        self.set_output_layer(o)
        self.setup()
        self.initialize()
        return self.output_layer[0]
    
    def set_network(self, i=2, h=5, o=1):
        self.set_input_layer(i+h)
        self.set_hidden_layer(h)
        self.set_output_layer(o)
        self.setup()
        self.initialize()
    
    def main2(self, idata, sdata, eta):
        self.simulate(idata, sdata, eta)
        return self.output_layer[0]
    
    def realize(self, idata):
        for i in range(len(idata)):
            for j in range(len(idata[-i])):
                self.input_layer[j] = idata[i][j]
            for j in range(len(self.hidden_layer)):
                self.input_layer[len(idata[i])+j-1] = self.hidden_layer[j]
            self.calculation()
        return self.output_layer[0]

ゲーム理論で用いられるTit for Tatを再現してみる。二人のプレーヤーが互いにRNNで相手の行動を予測し、相手の行動に対してTit for Tatに基づいた行動を選択する。

最初の行動はRNNで指定できないので、所与となる。この初期値と裏切りに対する感応度で収束の仕方が決まる。

協調を1、裏切りを0としている。RNNの予測値は整数値でないが、p=(RNNの出力値)で次回に協調を行う。

例1:1期目に、プレーヤー1が協力、プレーヤー2が裏切り。


In [109]:
nn1 = RNN()
nn1.set_network()
nn2 = RNN()
nn2.set_network()

idata1 = [[1, 0]]
idata2 = [[0, 1]]
sdata1 = [[0]]
sdata2 = [[1]]

for t in range(20):
    
    for i in range(10):
        nn1.main2(idata1, sdata2, 0.9)
        nn2.main2(idata2, sdata1, 0.9)
    
    idata1.append([sdata1[-1][0], sdata2[-1][0]])
    idata2.append([idata1[-1][1], idata1[-1][0]])
                   
    n1r = nn1.realize(idata1)
    n2r = nn2.realize(idata1)
    sdata1.append([np.random.choice([1, 0], p=[n1r, 1-n1r])])
    
    sdata2.append([np.random.choice([1, 0], p=[n2r, 1-n2r])])
        
idata.append([sdata1[-1][0], sdata2[-1][0]])
print nn1.realize(idata1), nn2.realize(idata), idata1


0.020952236405 0.0540398955796 [[1, 0], [0, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]

In [ ]:
下の図より最初は交互に相手にしっぺ返しをしているがやがて両者が裏切り合うこと状態に収束する

In [111]:
p1 = []
p2 = []
for i in range(len(idata1)):
    p1.append(idata1[i][0])
for i in range(len(idata2)):
    p2.append(idata2[i][0])
plt.plot(p1)
plt.plot(p2)


Out[111]:
[<matplotlib.lines.Line2D at 0x107467250>]

例2:1期目に、プレーヤー1が協力、プレーヤー2が協力。ただし、プレーヤー2は相手の裏切りをかなり警戒している。

警戒を表すためにp=(RNNの出力値 - 0.2)とする。p<0の場合はp=0に直す。


In [112]:
nn1 = RNN()
nn1.set_network()
nn2 = RNN()
nn2.set_network()

idata1 = [[1, 1]]
idata2 = [[1, 1]]
sdata1 = [[1]]
sdata2 = [[1]]

for t in range(20):
    
    for i in range(10):
        nn1.main2(idata1, sdata2, 0.9)
        nn2.main2(idata2, sdata1, 0.9)
    
    idata1.append([sdata1[-1][0], sdata2[-1][0]])
    idata2.append([idata1[-1][1], idata1[-1][0]])
                   
    n1r = nn1.realize(idata1)
    n2r = nn2.realize(idata1)
    
    prob1 = n1r 
    prob2 = n2r - 0.3
    
    if prob2 < 0:
        prob2 = 0
    
    sdata1.append([np.random.choice([1, 0], p=[prob1, 1-prob1])])
    
    sdata2.append([np.random.choice([1, 0], p=[prob2, 1-prob2])])
        
idata.append([sdata1[-1][0], sdata2[-1][0]])
print nn1.realize(idata1), nn2.realize(idata), idata1


0.615854475273 0.882223771712 [[1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [1, 1], [0, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 0], [1, 1], [0, 0], [1, 1], [1, 1]]

In [113]:
p1 = []
p2 = []
for i in range(len(idata1)):
    p1.append(idata1[i][0])
for i in range(len(idata2)):
    p2.append(idata2[i][0])
plt.plot(p1)
plt.plot(p2)


Out[113]:
[<matplotlib.lines.Line2D at 0x10838df50>]

例3:次に相手の行動を完全には観測できない場合を考える。t期の相手の行動をt+1期にノイズが加わって知る。例えば、1期目に相手が協調したことを、確率90%で2期目に正しく知れるが、10%で裏切りと誤って伝わる場合である。

ノイズは20%の確率で加わるものとする。その他の条件は例1と同じにした。


In [116]:
nn1 = RNN()
nn1.set_network()
nn2 = RNN()
nn2.set_network()

idata1 = [[1, 0]]
idata2 = [[0, 1]]
sdata1 = [[0]]
sdata2 = [[1]]

for t in range(20):
    
    for i in range(10):
        nn1.main2(idata1, sdata2, 0.9)
        nn2.main2(idata2, sdata1, 0.9)
    
    idata1.append([sdata1[-1][0], np.random.choice([sdata2[-1][0], 1-sdata2[-1][0]], p=[0.8, 0.2])])
    idata2.append([sdata2[-1][0], np.random.choice([sdata1[-1][0], 1-sdata1[-1][0]], p=[0.8, 0.2])])
                   
    n1r = nn1.realize(idata1)
    n2r = nn2.realize(idata1)
    
    prob1 = n1r 
    prob2 = n2r 
    
    sdata1.append([np.random.choice([1, 0], p=[prob1, 1-prob1])])
    
    sdata2.append([np.random.choice([1, 0], p=[prob2, 1-prob2])])
        
idata.append([sdata1[-1][0], sdata2[-1][0]])
print nn1.realize(idata1), nn2.realize(idata), idata1


0.18325162225 0.229439706952 [[1, 0], [0, 1], [1, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0]]

In [117]:
p1 = []
p2 = []
for i in range(len(idata1)):
    p1.append(idata1[i][0])
for i in range(len(idata2)):
    p2.append(idata2[i][0])
plt.plot(p1)
plt.plot(p2)


Out[117]:
[<matplotlib.lines.Line2D at 0x1081db750>]

In [88]:
print sdata1, sdata2


[[1], [1], [1], [1]] [[1], [1], [1], [1]]

In [ ]:


In [78]:
j = np.array([np.array([0])])
j = np.append(j, np.array([1]))

In [79]:
j


Out[79]:
array([0, 1])

In [ ]: