實作標準矩陣運算
暫時移除非線性的計算
層數 4 層
訓練 為 一次函數

輸入 1維資料
第一層 2
第二層 3
輸出 1維資料

採用 右乘法 x * w


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math

In [2]:
#定義常用函數
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

In [3]:
#準備訓練資料
x_data = np.linspace(-20, 20, 100, dtype = np.double)[:,np.newaxis]
y_data = -x_data.copy() + np.random.normal(0, 5, x_data.shape)

In [4]:
#學習速率
learning_rate = 0.001

In [5]:
#初始值
b1 = np.random.rand(1,2)
w1 = np.random.rand(1,2)

b2 = np.random.rand(1,3)
w2 = np.random.rand(2,3)

b3 = np.random.rand(1,1)
w3 = np.random.rand(3,1)

In [6]:
index = 0

batch = 30
temp_gradient_b3 = np.zeros(b3.shape)
temp_gradient_w3 = np.zeros(w3.shape)

temp_gradient_b2 = np.zeros(b2.shape)
temp_gradient_w2 = np.zeros(w2.shape)

temp_gradient_b1 = np.zeros(b1.shape)
temp_gradient_w1 = np.zeros(w1.shape)

while index < x_data.size * 30: 
    #x_data.size
    
    map_index = np.random.randint(x_data.size)
    #map_index = 0
    #map_index = index
    
    #Feedforward
    x1 = x_data[map_index] #11
    s1 = np.dot(x1, w1) + b1 #12
    a1 = s1
    
    x2 = a1 #12
    s2 = np.dot(x2, w2) + b2 #13
    a2 = s2
    
    x3 = a2 #13
    s3 = np.dot(x3, w3) + b3 #11
    a3 = s3
    
    y_Hat = a3 #11
    y_Real = y_data[map_index] #11
    
    #Backpropagation
    #===== 準備BP for b3 w3 =====
    delta_3_out = 2 * (y_Hat - y_Real)  
    delta_3_in = delta_3_out * 1; #print(delta_3_in)
    
    gradient_b3 = delta_3_in #11
    gradient_w3 = np.dot(np.transpose(x3), delta_3_in) #31
    
    #===== 準備BP for b2 w2 =====
    delta_2_out = np.dot(delta_3_in, np.transpose(w3))
    delta_2_in = delta_2_out * 1
    
    gradient_b2 = delta_2_in #13
    gradient_w2 = np.dot(np.transpose(x2), delta_2_in) #23
    
    #===== 準備BP for b1 w1 =====
    delta_1_out = np.dot(delta_2_in, np.transpose(w2))
    delta_1_in = delta_1_out * 1
    
    gradient_b1 = delta_1_in #12
    gradient_w1 = np.dot(np.transpose(x1), delta_1_in) #12
    
    #暫存
    temp_gradient_b3 = temp_gradient_b3 + gradient_b3
    temp_gradient_w3 = temp_gradient_w3 + gradient_w3
    temp_gradient_b2 = temp_gradient_b2 + gradient_b2
    temp_gradient_w2 = temp_gradient_w2 + gradient_w2
    temp_gradient_b1 = temp_gradient_b1 + gradient_b1
    temp_gradient_w1 = temp_gradient_w1 + gradient_w1
    
    if (index + 1) % batch == 0:
        #更新
        b3 = b3 - temp_gradient_b3 * learning_rate / (batch * 1.0)
        w3 = w3 - temp_gradient_w3 * learning_rate / (batch * 1.0)
    
        b2 = b2 - temp_gradient_b2 * learning_rate / (batch * 1.0)
        w2 = w2 - temp_gradient_w2 * learning_rate / (batch * 1.0)
        
        b1 = b1 - temp_gradient_b1 * learning_rate / (batch * 1.0)
        w1 = w1 - temp_gradient_w1 * learning_rate / (batch * 1.0)
        
        temp_gradient_b3 = np.zeros(b3.shape)
        temp_gradient_w3 = np.zeros(w3.shape)

        temp_gradient_b2 = np.zeros(b2.shape)
        temp_gradient_w2 = np.zeros(w2.shape)

        temp_gradient_b1 = np.zeros(b1.shape)
        temp_gradient_w1 = np.zeros(w1.shape)  
        #print('update')
    
    index = index + 1

In [7]:
new_y = np.zeros(y_data.shape)

index = 0
while index < x_data.size:
    
    x1 = x_data[index] #11
    s1 = np.dot(x1, w1) + b1 #12
    a1 = s1
    
    x2 = a1 #12
    s2 = np.dot(x2, w2) + b2 #13
    a2 = s2
    
    x3 = a2 #13
    s3 = np.dot(x3, w3) + b3 #11
    a3 = s3
    
    y_Hat = a3 #11

    new_y[index] = y_Hat #11
    
    index = index + 1

In [8]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

ax.plot(x_data, y_data,'y-', lw = 2) #原本
ax.plot(x_data, new_y,'r-', lw = 2) #推測

plt.show()