In [1]:

    
%pylab
%load_ext watermark
%watermark









    



Using matplotlib backend: MacOSX
Populating the interactive namespace from numpy and matplotlib
2016-09-13T20:19:31

CPython 3.5.2
IPython 5.0.0

compiler   : GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)
system     : Darwin
release    : 15.6.0
machine    : x86_64
processor  : i386
CPU cores  : 8
interpreter: 64bit

기본적인 구조: 하나의 게이트로 된 회로



In [2]:

    
def forward_multiply_gate(x, y):
    return x * y

x = -2
y = 3
forward_multiply_gate(x, y) # -6 이 리턴됨









    Out[2]:





-6

무작위 지역 탐색

x, y 를 랜덤하게 조금씩 변경하면서 가장 좋은 출력을 내는 값을 추적합니다



In [3]:

    
tweak_amount = 0.01
best_out = -np.inf
best_x = x
best_y = y
for k in range(100):
    x_try = x + tweak_amount * (np.random.random() * 2 - 1) # x 를 조금 변경
    y_try = y + tweak_amount * (np.random.random() * 2 - 1) # y 를 조금 변경
    out = forward_multiply_gate(x_try, y_try)
    # 현재까지 최고 값 보다 좋은 경우 이를 새로운 최고 값으로 저장합니다
    if out > best_out:
        best_out = out
        best_x = x_try
        best_y = y_try
best_x, best_y, best_out









    Out[3]:





(-1.992011587466079, 2.990067201508147, -5.9562485127065)

계산 기울기



In [4]:

    
out = forward_multiply_gate(x, y)
h = 0.00001

# x 에 대한 변화율을 계산
xph = x + h
out2 = forward_multiply_gate(xph, y)
x_derivative = (out2 - out) / h

# y 에 대한 변화율을 계산
yph = y + h
out3 = forward_multiply_gate(x, yph)
y_derivative = (out3 - out) / h

print(x_derivative, y_derivative)

step_size = 0.01
out = forward_multiply_gate(x, y)
x_new = x + step_size * x_derivative
y_new = y + step_size * y_derivative
out_new = forward_multiply_gate(x_new, y_new)
print(out_new)









    



3.000000000064062 -2.0000000000131024
-5.870599999997832

공식 기울기



In [5]:

    
x_gradient = y # 수학 공식에 의해
y_gradient = x

x_new2 = x + step_size * x_gradient
y_new2 = y + step_size * y_gradient
forward_multiply_gate(x_new2, y_new2)









    Out[5]:





-5.8706

중첩된 구조



In [6]:

    
# 덧셈 게이트
def forward_add_gate(a, b):
    return a + b

# 전체 회로
def forward_circuit(x, y, z):
    q = forward_add_gate(x, y)
    f = forward_multiply_gate(q, z)
    return f



In [7]:

    
x = -2
y = 5
z = -4
forward_circuit(x, y, z)









    Out[7]:





-12

역전파



In [8]:

    
q = forward_add_gate(x, y)
f = forward_multiply_gate(q, z)

# 입력에 대한 곱셈 게이트의 기울기
derivative_f_wrt_z = q
derivative_f_wrt_q = z
print(derivative_f_wrt_z, derivative_f_wrt_q)

# 입력에 대한 덧셈 게이트의 기울기
derivative_q_wrt_x = 1.0
derivative_q_wrt_y = 1.0

# 체인 룰
derivative_f_wrt_x = derivative_q_wrt_x * derivative_f_wrt_q
derivative_f_wrt_y = derivative_q_wrt_y * derivative_f_wrt_q
print(derivative_f_wrt_x, derivative_f_wrt_y)









    



3 -4
-4.0 -4.0



In [9]:

    
# 포스에 맞춰 입력을 조정합니다.
x_new3 = x + step_size * derivative_f_wrt_x
y_new3 = y + step_size * derivative_f_wrt_y
z_new3 = z + step_size * derivative_f_wrt_z

# 회로가 더 높은 값을 출력합니다.
q = forward_add_gate(x_new3, y_new3)
f = forward_multiply_gate(q, z_new3)
print(q, f)









    



2.92 -11.5924

계산 기울기로 체크



In [10]:

    
x_derivative = (forward_circuit(x + h, y, z) - forward_circuit(x, y, z)) / h
y_derivative = (forward_circuit(x, y + h, z) - forward_circuit(x, y, z)) / h
z_derivative = (forward_circuit(x, y, z + h) - forward_circuit(x, y, z)) / h
x_derivative, y_derivative, z_derivative









    Out[10]:





(-4.000000000026205, -3.9999999998485687, 3.000000000064062)

단일 뉴런

Unit은 회로 그림의 선에 대응합니다



In [11]:

    
class Unit(object):
    
    def __init__(self, value, grad):
        # 정방향에서 계산되는 값
        self.value = value
        # 역방향일 때 계산되는 이 유닛에 대한 회로 출력의 변화율
        self.grad = grad



In [12]:

    
class MultiplyGate(object):
    
    def forward(self, u0, u1):
        self.u0 = u0
        self.u1 = u1
        self.utop = Unit(self.u0.value * self.u1.value, 0.0)
        return self.utop
    
    def backward(self):
        # 출력 유닛의 기울기를 받아 곱셉 게이트의 자체 기울기와 곱하여(체인 룰) 입력 유닛의 기울기로 저장합니다.
        self.u0.grad += self.u1.value * self.utop.grad
        self.u1.grad += self.u0.value * self.utop.grad
        
        
class AddGate(object):
    
    def forward(self, u0, u1):
        self.u0 = u0
        self.u1 = u1
        self.utop = Unit(self.u0.value + self.u1.value, 0.0)
        return self.utop

    def backward(self):
        # 입력에 대한 덧셈 게이트의 기울기는 1 입니다
        self.u0.grad += 1 * self.utop.grad
        self.u1.grad += 1 * self.utop.grad

        
class SigmoidGate(object):
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def forward(self, u0):
        self.u0 = u0
        self.utop = Unit(self.sigmoid(self.u0.value), 0.0)
        return self.utop

    def backward(self):
        s = self.sigmoid(self.u0.value)
        self.u0.grad += (s * (1 - s)) * self.utop.grad



In [13]:

    
# 입력 유닛 생성
a = Unit(1.0, 0.0)
b = Unit(2.0, 0.0)
c = Unit(-3.0, 0.0)
x = Unit(-1.0, 0.0)
y = Unit(3.0, 0.0)

# 게이트 생성
mulg0 = MultiplyGate()
mulg1 = MultiplyGate()
addg0 = AddGate()
addg1 = AddGate()
sg0 = SigmoidGate()

# 정방향 계산
def forward_neuron():
    ax = mulg0.forward(a, x)
    by = mulg1.forward(b, y)
    axpby = addg0.forward(ax, by)
    axpbypc = addg1.forward(axpby, c)
    s = sg0.forward(axpbypc)
    return s

s = forward_neuron()

s.value









    Out[13]:





0.88079707797788231



In [14]:

    
s.grad = 1.0
sg0.backward() # axpbypc 에 기울기 저장
addg1.backward() # axpby 와 c 에 기울기 저장
addg0.backward() # ax 와 by 에 기울기 저장
mulg1.backward() # b 와 y 에 기울기 저장
mulg0.backward() # a 와 x 에 기울기 저장



In [15]:

    
a.value += step_size * a.grad
b.value += step_size * b.grad
c.value += step_size * c.grad
x.value += step_size * x.grad
y.value += step_size * y.grad

forward_neuron()

print(a.grad, b.grad, c.grad, x.grad, y.grad)
s.value









    



-0.104993585404 0.314980756211 0.104993585404 0.104993585404 0.209987170807






    Out[15]:





0.88079707797788231

기울기 확인



In [16]:

    
def forward_circuit_fast(a,b,c,x,y):
    return 1/(1 + np.exp( - (a*x + b*y + c)))

a = 1
b = 2
c = -3
x = -1
y = 3;

a_grad = (forward_circuit_fast(a+h,b,c,x,y) - forward_circuit_fast(a,b,c,x,y))/h;
b_grad = (forward_circuit_fast(a,b+h,c,x,y) - forward_circuit_fast(a,b,c,x,y))/h;
c_grad = (forward_circuit_fast(a,b,c+h,x,y) - forward_circuit_fast(a,b,c,x,y))/h;
x_grad = (forward_circuit_fast(a,b,c,x+h,y) - forward_circuit_fast(a,b,c,x,y))/h;
y_grad = (forward_circuit_fast(a,b,c,x,y+h) - forward_circuit_fast(a,b,c,x,y))/h;
print(a_grad, b_grad, c_grad, x_grad, y_grad)









    



-0.104993985206 0.314977157934 0.104993185601 0.104993185601 0.209985571575



In [ ]: