In [20]:
    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys, os
%matplotlib inline
    
In [2]:
    
def AND(x1, x2):
    w1, w2, theta = 0.5, 0.5, 0.7
    tmp = x1*w1 + x2*w2
    if tmp <= theta:
        return 0
    elif tmp > theta:
        return 1
    
In [2]:
    
AND(1, 1)
    
    Out[2]:
In [3]:
    
AND(1, 0)
    
    Out[3]:
In [8]:
    
def AND2(x1, x2):
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5])
    b = -0.7
    tmp = np.sum(w*x)+b
    if tmp <= 0:
        return 0
    else:
        return 1
    
In [9]:
    
AND2(1,1)
    
    Out[9]:
In [10]:
    
AND2(0,1)
    
    Out[10]:
In [11]:
    
AND2(0,0)
    
    Out[11]:
In [4]:
    
def NAND(x1, x2):
    x = np.array([x1, x2])
    w = np.array([-0.5, -0.5])
    b = 0.7
    tmp = np.sum(w*x)+b
    if tmp < 0:
        return 0
    else:
        return 1
    
In [5]:
    
NAND(0,0)
    
    Out[5]:
In [6]:
    
NAND(0,1)
    
    Out[6]:
In [7]:
    
NAND(1,1)
    
    Out[7]:
In [8]:
    
NAND(1,0)
    
    Out[8]:
In [9]:
    
def OR(x1, x2):
    x = np.array([x1,x2])
    w = np.array([0.5, 0.5])
    b = -0.2
    tmp = np.sum(w*x) + b
    if tmp <= 0:
        return 0
    else:
        return 1
    
In [10]:
    
OR(0,0)
    
    Out[10]:
In [11]:
    
OR(0,1)
    
    Out[11]:
In [12]:
    
OR(1,0)
    
    Out[12]:
In [13]:
    
OR(1,1)
    
    Out[13]:
In [1]:
    
# xor 문제는 퍼셉트론으로 풀 수 없다.
# 그러나 직선이라는 제약을 없애면 가능! -> 다층 퍼셉트론을 사용
    
In [2]:
    
def XOR(x1, x2):
    s1 = NAND(x1,x2)
    s2 = OR(x1, x2)
    y = AND(s1, s2)
    return y
    
In [22]:
    
def step_function(x):
    return np.array(x > 0, dtype=np.int)
    
In [26]:
    
np.arange?
    
In [29]:
    
x = np.arange(-5.0, 5.0, 0.1)
# 기존엔 np.arange([-5.0, 5.0, 0.1])로 했으나 [] 없이 사용 가능하게 변함
y = step_function(x)
plt.ylim(-0.1, 1.1)
plt.plot(x, y)
plt.show()
    
    
In [3]:
    
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
In [38]:
    
x = np.array([-5.0, 5.0, 0.1])
    
In [39]:
    
sigmoid(x)
    
    Out[39]:
In [42]:
    
x = np.arange(-5, 5, 0.1)
    
In [43]:
    
y = sigmoid(x)
    
In [44]:
    
plt.plot(x, y)
plt.ylim(-0.1, 1.1)
plt.show()
    
    
In [45]:
    
def relu(x):
    return np.maximum(0, x)
    
In [47]:
    
a = np.array([1,2,3,4])
print(a)
    
    
In [48]:
    
np.ndim(a)
    
    Out[48]:
In [49]:
    
# 1차원도 튜플임
a.shape
    
    Out[49]:
In [50]:
    
a.shape[0]
    
    Out[50]:
In [51]:
    
b = np.array([[1,2],[3,4], [5,6]])
print(b)
    
    
In [52]:
    
np.ndim(b)
    
    Out[52]:
In [54]:
    
b.shape
    
    Out[54]:
In [55]:
    
a = np.array([[1,2], [3,4]])
b = np.array([[5,6], [7,8]])
    
In [56]:
    
a.shape
    
    Out[56]:
In [57]:
    
b.shape
    
    Out[57]:
In [58]:
    
np.dot(a,b)
    
    Out[58]:
In [4]:
    
def identity_function(x):
    return x
    
In [5]:
    
# 3층 신경망
def init_network():
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    network['b1'] = np.array([0.1, 0.2, 0.3])
    network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
    network['b2'] = np.array([0.1, 0.2])
    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
    network['b3'] = np.array([0.1, 0.2])
    
    return network
def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = identity_function(a3)
    
    return y
network = init_network()
x = np.array([1.0, 0.5])
y = forward(network, x)
print(y)
    
    
In [62]:
    
# 위의 함수는 출력층에서 항등 함수를 사용했습니다. 이번엔 소프트맥스 함수를 사용해보겠습니다!!
    
In [63]:
    
a = np.array([0.3, 2.9, 4.0])
    
In [64]:
    
exp_a = np.exp(a)
    
In [65]:
    
print(exp_a)
    
    
In [66]:
    
sum_exp_a = np.sum(exp_a)
    
In [67]:
    
print(sum_exp_a)
    
    
In [69]:
    
y = exp_a / sum_exp_a
    
In [70]:
    
print(y)
    
    
In [6]:
    
def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    
    return y
    
In [7]:
    
import sys, os
sys.path.append(os.pardir)
    
In [9]:
    
from dataset.mnist import load_mnist
    
In [11]:
    
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
    
In [12]:
    
print(x_train.shape)
    
    
In [13]:
    
print(t_train.shape)
    
    
In [14]:
    
print(x_test.shape)
    
    
In [15]:
    
print(t_test.shape)
    
    
In [17]:
    
from PIL import Image
    
In [18]:
    
def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()
    
In [19]:
    
img = x_train[0]
print(img)
    
    
In [20]:
    
label = t_train[0]
print(label)
    
    
In [21]:
    
print(img.shape)
    
    
In [22]:
    
img = img.reshape(28,28) # 784 -> 28 x 28로 변환
    
In [23]:
    
print(img.shape)
    
    
In [24]:
    
img_show(img)
    
In [29]:
    
import pickle
    
In [30]:
    
def get_data():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
    return x_test, t_test
def init_network():
    with open('./ch03/sample_weight.pkl', 'rb') as f:
        network = pickle.load(f)
        
    return network
def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)
    
    return y
    
In [34]:
    
x, t = get_data()
network = init_network()
accuracy_cnt = 0
for i in range(len(x)):
    y = predict(network, x[i])
    p = np.argmax(y)
    if p == t[i]:
#         print(p)
        accuracy_cnt += 1
print("Accuracy:" +str(float(accuracy_cnt)/len(x)))
    
    
In [35]:
    
# 데이터를 특정 범위로 변환하는 처리 : 정규화
# 입력 데이터에 특정 변환을 가하는 것 : 전처리
    
In [36]:
    
# 배치 : 하나로 묶은 입력 데이터 (x[0], y[0]엔 0번째 이미지와 그 추론 결과가 저장됨)
    
In [37]:
    
x, _ = get_data()
network = init_network()
W1, W2, W3 = network['W1'], network['W2'], network['W3']
    
In [38]:
    
x.shape
    
    Out[38]:
In [39]:
    
x[0].shape
    
    Out[39]:
In [40]:
    
W1.shape
    
    Out[40]:
In [41]:
    
W2.shape
    
    Out[41]:
In [42]:
    
W3.shape
    
    Out[42]:
In [47]:
    
x, t = get_data()
network = init_network()
batch_size = 100
accuracy_cnt = 0
for i in range(0, len(x), batch_size):
    x_batch = x[i:i+batch_size]
    y_batch = predict(network, x_batch)
    p = np.argmax(y_batch, axis=1)
    accuracy_cnt += np.sum(p ==t[i:i+batch_size])
    
print("Accuracy:" + str(float(accuracy_cnt)/len(x)))
    
    
In [51]:
    
x[1]
    
    Out[51]:
In [52]:
    
x[1:2]
    
    Out[52]:
In [53]:
    
x
    
    Out[53]:
In [54]:
    
p
    
    Out[54]:
In [3]:
    
def mean_squared_error(y, t):
    return 0.5* np.sum((y-t)**2)
    
In [4]:
    
t = [0, 0, 1, 0, 0, 0, 0, 0, 0 ,0]
# 정답은 2
    
In [10]:
    
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
# 2일 확률이 가장 높다고 추정
    
In [9]:
    
np.array(t)
    
    Out[9]:
In [8]:
    
mean_squared_error(np.array(y), np.array(t))
    
    Out[8]:
In [11]:
    
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
# 7일 확률이 가장 높다고 추정
    
In [12]:
    
mean_squared_error(np.array(y), np.array(t))
    
    Out[12]:
첫 번째 추정 결과가 오차가 더 작기에 정답에 더 가까운 것으로 판단 가능
교차 엔트로피 오차는 정답일 때의 출력이 전체 값을 정하게 됩니다
In [14]:
    
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t*np.log(y + delta))
# delta를 더하는 이유는 np.log() 안에 0을 입력하면 마이너스 무한대가 되어 계산이 진행되지 않기에..!
    
In [15]:
    
t = [0, 0, 1, 0, 0, 0, 0, 0, 0 ,0]
# 정답은 2
    
In [16]:
    
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
# 2일 확률이 가장 높다고 추정
    
In [17]:
    
cross_entropy_error(np.array(y), np.array(t))
    
    Out[17]:
In [18]:
    
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
# 7일 확률이 가장 높다고 추정
    
In [19]:
    
cross_entropy_error(np.array(y), np.array(t))
    
    Out[19]:
In [21]:
    
sys.path.append(os.pardir)
    
In [22]:
    
from dataset.mnist import load_mnist
    
In [23]:
    
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
    
    
In [24]:
    
print(x_train.shape)
    
    
In [25]:
    
print(t_train.shape)
    
    
In [26]:
    
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
    
In [27]:
    
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y)) / batch_size
    
In [28]:
    
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y[np.arange(batch_size), t])) / batch_size
    
In [29]:
    
# 중심 차분 ( 중앙 차분 ) : x를 중심으로 그 전후의 차분을 계산
    
In [31]:
    
def numerical_diff(f, x):
    h = 1e-4
    return (f(x+h) - f(x-h)) / (2*h)
    
In [32]:
    
def function_1(x):
    return 0.01*x**2 + 0.1*x
    
In [33]:
    
x = np.arange(0.0, 20.0, 0.1)
    
In [34]:
    
y = function_1(x)
    
In [35]:
    
plt.xlabel('x')
plt.ylabel('f(x)')
plt.plot(x, y)
plt.show()
    
    
In [36]:
    
numerical_diff(function_1, 5)
    
    Out[36]:
In [37]:
    
numerical_diff(function_1, 10)
    
    Out[37]:
In [38]:
    
def function_2(x):
    return x[0]**2+x[1]**2
    
In [41]:
    
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x) # x와 형상이 같은 배열 생성
    
    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)
        
        x[idx] = tmp_val -h
        fxh2 = f(x)
        
        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
        
    return grad
    
In [42]:
    
numerical_gradient(function_2, np.array([3.0, 4.0]))
    
    Out[42]:
In [45]:
    
numerical_gradient(function_2, np.array([0.0, 2.0]))
    
    Out[45]:
In [46]:
    
numerical_gradient(function_2, np.array([3.0, 0.0]))
    
    Out[46]:
In [48]:
    
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
    return x
    
In [49]:
    
## answer : 경사법으로 f(x0,x1) = x0^2 +x1^2의 최솟값을 구하라
    
In [50]:
    
def function_2(x):
    return x[0]**2 + x[1]**2
    
In [51]:
    
init_x = np.array([-3.0, 4.0])
    
In [52]:
    
gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)
    
    Out[52]:
In [56]:
    
from common.functions import *
from common.gradient import numerical_gradient
    
In [57]:
    
# 2층 신경망
class TwoLayerNEt:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y ==t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
In [60]:
    
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.two_layer_net import TwoLayerNet
    
In [61]:
    
# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
# 하이퍼파라미터
iters_num = 10000  # 반복 횟수를 적절히 설정한다.
train_size = x_train.shape[0]
batch_size = 100   # 미니배치 크기
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
# 1에폭당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)
for i in range(iters_num):
    # 미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 기울기 계산
    #grad = network.numerical_gradient(x_batch, t_batch)
    grad = network.gradient(x_batch, t_batch)
    
    # 매개변수 갱신
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    # 학습 경과 기록
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    # 1에폭당 정확도 계산
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
# 그래프 그리기
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
    
    
    
In [ ]: