資料預處理


In [4]:
import numpy as np
import pandas as pd
import os

filepath = '/Users/mac/Desktop/Kaggle_datasets/MNIST'
filename01 = 'train.csv'
filename02 = 'test.csv'

df_Train = pd.read_csv(os.path.join(filepath, filename01))
df_Test = pd.read_csv(os.path.join(filepath, filename02))

In [5]:
#先觀察形狀
df_Train.shape


Out[5]:
(42000, 785)

In [7]:
df_Test.shape


Out[7]:
(28000, 784)

In [6]:
#看一下column排列
df_Train.head()


Out[6]:
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 4 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 785 columns


In [14]:
#feature就直接拿取pixel
train_feature = df_Train.values[:,1:]
test_feature = df_Test.values #test並沒有label欄位,所以不用改變

In [15]:
train_feature.shape


Out[15]:
(42000, 784)

In [16]:
test_feature.shape


Out[16]:
(28000, 784)

In [19]:
np.max(train_feature) #看來feature是沒有標準化過的


Out[19]:
255

In [20]:
#來標準化一下吧
train_feature = train_feature/255
test_feature = test_feature/255

In [26]:
#調整維度fit CNN模型,feature處理就到此為止
train_feature_4D = train_feature.reshape(-1,28,28,1)
test_feature_4D = test_feature.reshape(-1,28,28,1)

In [23]:
#label要做onehot encoding處理,label處理就到此為止
import keras
train_label = keras.utils.to_categorical(df_Train.values[:,0], num_classes=10)

In [25]:
train_label


Out[25]:
array([[ 0.,  1.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  1.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  1.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.]])

跑模型,先用一般的sequential CNN


In [28]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import matplotlib.pyplot as plt

def show_train_history(train_history,train,validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

    
model = Sequential()
# input: 28x28 images with 1 channels -> (28, 28, 1) tensors.
# this applies 16 convolution filters of size 3x3 each.
model.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) #每2x2取一個Max pool
model.add(Dropout(0.25))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu')) #壓平做full connected layer
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax')) #輸出成0~9其中一種辨識

print(model.summary())

model.compile(loss='categorical_crossentropy', optimizer='adam')

train_history = model.fit(train_feature_4D, train_label, batch_size=200, validation_split=0.2, epochs=20)

######################### 訓練過程視覺化
show_train_history(train_history,'acc','val_acc')
show_train_history(train_history,'loss','val_loss')

#儲存訓練結果
model.save_weights("Savemodel/MNIST(Kaggles)_SimpleCNN.h5")
print('model saved to disk')


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_5 (Conv2D)            (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 24, 24, 16)        2320      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 12, 12, 16)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 12, 12, 16)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 10, 10, 32)        4640      
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 8, 8, 32)          9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 4, 4, 32)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               65664     
_________________________________________________________________
dropout_6 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)                1290      
=================================================================
Total params: 83,322
Trainable params: 83,322
Non-trainable params: 0
_________________________________________________________________
None
Train on 33600 samples, validate on 8400 samples
Epoch 1/10
33600/33600 [==============================] - 96s - loss: 0.6967 - val_loss: 0.1293
Epoch 2/10
33600/33600 [==============================] - 98s - loss: 0.2071 - val_loss: 0.0840
Epoch 3/10
33600/33600 [==============================] - 98s - loss: 0.1462 - val_loss: 0.0708
Epoch 4/10
33600/33600 [==============================] - 126s - loss: 0.1183 - val_loss: 0.0532
Epoch 5/10
33600/33600 [==============================] - 95s - loss: 0.1032 - val_loss: 0.0471
Epoch 6/10
33600/33600 [==============================] - 92s - loss: 0.0914 - val_loss: 0.0444
Epoch 7/10
33600/33600 [==============================] - 93s - loss: 0.0791 - val_loss: 0.0415
Epoch 8/10
33600/33600 [==============================] - 93s - loss: 0.0765 - val_loss: 0.0373
Epoch 9/10
33600/33600 [==============================] - 94s - loss: 0.0686 - val_loss: 0.0389
Epoch 10/10
33600/33600 [==============================] - 93s - loss: 0.0633 - val_loss: 0.0337
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-b655ae21606a> in <module>()
     38 model.fit(train_feature_4D, train_label, batch_size=200, validation_split=0.2, epochs=10)
     39 
---> 40 show_train_history(train_history,'acc','val_acc')
     41 show_train_history(train_history,'loss','val_loss')
     42 

NameError: name 'train_history' is not defined

In [29]:
prediction = model.predict(test_feature_4D)

In [30]:
prediction[0]


Out[30]:
array([  4.85514562e-09,   3.30018290e-10,   9.99999881e-01,
         1.91172518e-08,   4.07498583e-11,   1.92173100e-12,
         8.36084023e-13,   1.09267717e-07,   6.89015023e-09,
         9.35201135e-12], dtype=float32)

In [52]:
#用np.argmax把softmax的東西轉換回來
np.argmax(prediction[0])


Out[52]:
2

In [55]:
len(prediction)


Out[55]:
28000

In [56]:
ans = []
for i in range(len(prediction)):
    ans.append(np.argmax(prediction[i]))

In [59]:
df_ans = pd.DataFrame(ans)
df_ans.to_csv('MNIST_ans.csv')

現學現賣,用Functional Model做CNN


In [ ]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import matplotlib.pyplot as plt

def show_train_history(train_history,train,validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

from keras.models import Model

# First, define the vision modules
digit_input = Input(shape=(-1, 28, 28, 1))
x = Conv2D(64, (3, 3))(digit_input)
x = Conv2D(64, (3, 3))(x)
x = MaxPooling2D((2, 2))(x)
out = Flatten()(x)

vision_model = Model(digit_input, out)

# Then define the tell-digits-apart model
digit_a = Input(shape=(-1, 28, 28, 1))
digit_b = Input(shape=(-1, 28, 28, 1))

# The vision model will be shared, weights and all
out_a = vision_model(digit_a)
out_b = vision_model(digit_b)

concatenated = keras.layers.concatenate([out_a, out_b])
out = Dense(1, activation='sigmoid')(concatenated)


classification_model = Model([digit_a, digit_b], out)

print(model.summary())

model.compile(loss='categorical_crossentropy', optimizer='adam')

train_history = model.fit(train_feature_4D, train_label, batch_size=200, validation_split=0.2, epochs=10)

######################### 訓練過程視覺化
show_train_history(train_history,'acc','val_acc')
show_train_history(train_history,'loss','val_loss')

#儲存訓練結果
model.save_weights("Savemodel/MNIST(Kaggles)_ComplexCNN.h5")
print('model saved to disk')