In [1]:
# windows only hack for graphviz path 
import os
for path in os.environ['PATH'].split(os.pathsep):
    if path.endswith("Library\\bin"):
        os.environ['PATH']+=os.pathsep+os.path.join(path, 'graphviz')

In [2]:
import keras
from keras.models import Sequential
from PIL import Image
import numpy as np


Using TensorFlow backend.

In [3]:
import gzip
import pickle
with gzip.open("../Week02/mnist.pkl.gz", 'rb') as f:
    train_set, validation_set, test_set = pickle.load(f, encoding='latin1')
train_X, train_y = train_set
validation_X, validation_y = validation_set
test_X, test_y = test_set


train_Y = np.eye(10)[train_y]
test_Y = np.eye(10)[test_y]
validation_Y = np.eye(10)[validation_y]

# or
# from keras.datasets import mnist
# from keras.utils import np_utils
# (train_X, train_y), (test_X, test_y) = mnist.load_data()
# train_Y = np_utils.to_categorical(train_y, 10)
# test_Y = np_utils.to_categorical(test_y, 10)

logistic regression


In [4]:
from keras.layers import Dense, Activation
model = Sequential()
model.add(Dense(units=10, input_dim=784))
model.add(Activation('softmax'))

In [5]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

In [6]:
from IPython.display import SVG, display
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))


Out[6]:
G 1902677000824 dense_1_input: InputLayer input: output: (None, 784) (None, 784) 1902676938424 dense_1: Dense input: output: (None, 784) (None, 10) 1902677000824->1902676938424 1902676938704 activation_1: Activation input: output: (None, 10) (None, 10) 1902676938424->1902676938704

In [7]:
model.fit(train_X, train_Y, validation_data=(validation_X, validation_Y), batch_size=128, epochs=15)


Train on 50000 samples, validate on 10000 samples
Epoch 1/15
50000/50000 [==============================] - 0s - loss: 1.3414 - acc: 0.6857 - val_loss: 0.8683 - val_acc: 0.8318
Epoch 2/15
50000/50000 [==============================] - 0s - loss: 0.7797 - acc: 0.8291 - val_loss: 0.6367 - val_acc: 0.8615
Epoch 3/15
50000/50000 [==============================] - 0s - loss: 0.6348 - acc: 0.8509 - val_loss: 0.5439 - val_acc: 0.8743
Epoch 4/15
50000/50000 [==============================] - 0s - loss: 0.5644 - acc: 0.8614 - val_loss: 0.4922 - val_acc: 0.8825
Epoch 5/15
50000/50000 [==============================] - 0s - loss: 0.5214 - acc: 0.8681 - val_loss: 0.4590 - val_acc: 0.8887
Epoch 6/15
50000/50000 [==============================] - 0s - loss: 0.4919 - acc: 0.8736 - val_loss: 0.4358 - val_acc: 0.8909
Epoch 7/15
50000/50000 [==============================] - 0s - loss: 0.4700 - acc: 0.8773 - val_loss: 0.4182 - val_acc: 0.8938
Epoch 8/15
50000/50000 [==============================] - 0s - loss: 0.4530 - acc: 0.8801 - val_loss: 0.4044 - val_acc: 0.8959
Epoch 9/15
50000/50000 [==============================] - 0s - loss: 0.4393 - acc: 0.8828 - val_loss: 0.3932 - val_acc: 0.8982
Epoch 10/15
50000/50000 [==============================] - 0s - loss: 0.4280 - acc: 0.8853 - val_loss: 0.3840 - val_acc: 0.8997
Epoch 11/15
50000/50000 [==============================] - 0s - loss: 0.4183 - acc: 0.8867 - val_loss: 0.3762 - val_acc: 0.9005
Epoch 12/15
50000/50000 [==============================] - 0s - loss: 0.4101 - acc: 0.8890 - val_loss: 0.3693 - val_acc: 0.9017
Epoch 13/15
50000/50000 [==============================] - 0s - loss: 0.4029 - acc: 0.8902 - val_loss: 0.3637 - val_acc: 0.9029
Epoch 14/15
50000/50000 [==============================] - 0s - loss: 0.3965 - acc: 0.8918 - val_loss: 0.3585 - val_acc: 0.9036
Epoch 15/15
50000/50000 [==============================] - 0s - loss: 0.3908 - acc: 0.8931 - val_loss: 0.3540 - val_acc: 0.9038
Out[7]:
<keras.callbacks.History at 0x1bb0dc82860>

In [8]:
# 預測看看 test_X 前 20 筆
model.predict_classes(test_X[:20])


20/20 [==============================] - 0s
Out[8]:
array([7, 2, 1, 0, 4, 1, 4, 9, 6, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4], dtype=int64)

In [9]:
# 對答案
test_y[:20]


Out[9]:
array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4], dtype=int64)

In [10]:
# 看看 test accuracy
model.evaluate(test_X, test_Y)


 8032/10000 [=======================>......] - ETA: 0s
Out[10]:
[0.36359533268213273, 0.90269999999999995]

Q

  • optimizer 換成 "adam"
  • optimizer 換成 keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)

建立 convolutional model

我們之前的網路架構

  • convolution 2d kernel=(3,3), filters=32
  • relu
  • max pool
  • convolution 2d kernel=(3,3), filters=64
  • relu
  • max pool
  • dense units=1024
  • relu
  • dropout (rate=0.8) # 先省略這一層
  • dense units = 10
  • softmax

試著架出這樣的網路

然後訓練看看

開頭幾行可以這樣寫

from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Reshape
model = Sequential()
model.add(Reshape((28, 28, 1), input_shape=(784,) ))
model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same', activation="relu"))

In [11]:
# 參考答案
#%load q_keras_cnn.py