notebook.community

Edit and run



In [18]:

    
import tensorflow as tf
import numpy as np
from sklearn.cross_validation import train_test_split
from PIL import Image
from matplotlib.pyplot import imshow
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense, Flatten, Activation, Dropout, Conv2D, MaxPooling2D
from keras.utils import np_utils



In [2]:

    
# import data
data = np.genfromtxt('train.csv', delimiter=',', skip_header = 1)
print(data.shape)









    



(42000, 785)



In [19]:

    
# train, test, validation split
Y = data[:,0]
X = data[:,1:]

# normalize independant variables
X = X/255

# reshaping to get image structure
X_reshaped = X.reshape((42000,28,28,1))
print(X_reshaped.shape)

# one hot label encoding
Y_encode = np.eye((np.unique(Y)).size)[Y.astype(int)]

x_train, x_test, y_train, y_test = train_test_split(X_reshaped, Y_encode, test_size = 0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size = 0.5)

print(x_train.shape)
print(x_test.shape)
print(x_val.shape)
print(y_train.shape)
print(y_test.shape)
print(y_val.shape)









    



(42000, 28, 28, 1)
(29400, 28, 28, 1)
(6300, 28, 28, 1)
(6300, 28, 28, 1)
(29400, 10)
(6300, 10)
(6300, 10)



In [20]:

    
# test X reshape
%matplotlib inline
test = (X[101,:]).reshape(28, 28)
im = Image.fromarray(test)
imshow(test)









    Out[20]:





<matplotlib.image.AxesImage at 0x241d2bad828>



In [23]:

    
# test encoding
print(Y[1])
print(Y_encode[1])









    



0.0
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]



In [67]:

    
conv_net = Sequential()

# convolution layer 1
conv_net.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28,28,1)))
conv_net.add(MaxPooling2D(pool_size=(3,3)))
conv_net.add(Dropout(0.5))

print(conv_net.output.shape)

# convolution layer 2
conv_net.add(Conv2D(64, (3, 3), activation='relu'))
conv_net.add(MaxPooling2D(pool_size=(3,3)))
conv_net.add(Dropout(0.5))

print(conv_net.output.shape)

# fully connected
conv_net.add(Flatten())
conv_net.add(Dense(128, activation='relu'))
conv_net.add(Dropout(0.5))
conv_net.add(Dense(10, activation='softmax'))

conv_net.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])









    



(?, 9, 9, 32)
(?, 2, 2, 64)



In [32]:

    
conv_net.fit(x_train, y_train,batch_size=32, nb_epoch=10, verbose=1)









    



C:\Users\abjilani\AppData\Local\Continuum\Anaconda3\envs\dlnd-tf-lab\lib\site-packages\keras\models.py:837: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
  warnings.warn('The `nb_epoch` argument in `fit` '






    



Epoch 1/10
29400/29400 [==============================] - 15s - loss: 0.1413 - acc: 0.9495      
Epoch 2/10
29400/29400 [==============================] - 14s - loss: 0.0597 - acc: 0.9786     
Epoch 3/10
29400/29400 [==============================] - 15s - loss: 0.0478 - acc: 0.9835     
Epoch 4/10
29400/29400 [==============================] - 14s - loss: 0.0415 - acc: 0.9860     
Epoch 5/10
29400/29400 [==============================] - 15s - loss: 0.0369 - acc: 0.9871     
Epoch 6/10
29400/29400 [==============================] - 16s - loss: 0.0337 - acc: 0.9883     
Epoch 7/10
29400/29400 [==============================] - 16s - loss: 0.0314 - acc: 0.9893     
Epoch 8/10
29400/29400 [==============================] - 15s - loss: 0.0307 - acc: 0.9896     
Epoch 9/10
29400/29400 [==============================] - 17s - loss: 0.0290 - acc: 0.9900     
Epoch 10/10
29400/29400 [==============================] - 19s - loss: 0.0277 - acc: 0.9907      - ETA: 16s - loss: 0.0284 - acc: 0.9909






    Out[32]:





<keras.callbacks.History at 0x241d6368be0>



In [39]:

    
score = conv_net.evaluate(x_test, y_test, verbose=0)
print("%s: %.2f%%" % (conv_net.metrics_names[1], score[1]*100))









    



acc: 99.69%



In [40]:

    
score = conv_net.evaluate(x_val, y_val, verbose=0)
print("%s: %.2f%%" % (conv_net.metrics_names[1], score[1]*100))









    



acc: 99.71%



In [44]:

    
sub_data = np.genfromtxt('test.csv', delimiter=',', skip_header = 1)
print(sub_data.shape)









    



(28000, 784)



In [45]:

    
sub_data = sub_data/255
sub_data_reshaped = sub_data.reshape((28000,28,28,1))



In [47]:

    
predictions = conv_net.predict(sub_data_reshaped, verbose=1)









    



28000/28000 [==============================] - 5s



In [48]:

    
print(predictions.shape)









    



(28000, 10)



In [55]:

    
final = np.argmax(predictions, axis=1)
print(final.shape)



In [60]:

    
np.savetxt(X = final, fname='predictions.csv',delimiter=',', newline='\n', header='Label')



In [61]:

    
print(final.dtype)









    



int64