In [1]:
from keras.models import Sequential
from keras.utils import np_utils
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np


Using gpu device 0: GeForce GTX 760

In [ ]:


In [15]:
from preprocess_data_lib import *
(X_train, y_train, X_valid, y_valid, X_test, y_test) = getData(pct=1, cast=True)
y_train = np_utils.to_categorical(y_train)

In [16]:
y_train


Out[16]:
array([[ 0.,  0.,  0., ...,  1.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  1.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  1., ...,  0.,  0.,  0.]])

In [17]:
(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape, X_test.shape, y_test.shape)


Out[17]:
((37800, 784), (37800, 10), (4200, 784), (4200,), (28000, 784), (28000,))

In [18]:
mean = np.std(X_train)
X_train -= mean
X_test -= mean

In [20]:
input_dim = X_train.shape[1]
nb_classes = y_train.shape[1]

In [21]:
input_dim, nb_classes


Out[21]:
(784, 10)

In [22]:
X_train_reshaped = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test_reshaped = X_test.reshape(X_test.shape[0], 1, 28, 28)
X_valid_reshaped = X_valid.reshape(X_valid.shape[0], 1, 28, 28)

In [ ]:

Modèle 1


In [75]:
# Here's a Deep Dumb MLP (DDMLP)
model = Sequential()
model.add(Dense(input_dim, 128, init='lecun_uniform'))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(128, 128, init='lecun_uniform'))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(128, nb_classes, init='lecun_uniform'))
model.add(Activation('relu'))

In [76]:
model.compile(loss='mse', optimizer='rmsprop')

In [77]:
model.fit(X_train, y_train, nb_epoch=30, batch_size=8, validation_split=0.05, show_accuracy=False, verbose=0)

In [78]:
accuracy_score(y_valid, model.predict_classes(X_valid))


128/4200 [..............................] - ETA: 0s256/4200 [>.............................] - ETA: 0s384/4200 [=>............................] - ETA: 0s512/4200 [==>...........................] - ETA: 0s640/4200 [===>..........................] - ETA: 0s768/4200 [====>.........................] - ETA: 0s896/4200 [=====>........................] - ETA: 0s1024/4200 [======>.......................] - ETA: 0s1152/4200 [=======>......................] - ETA: 0s1280/4200 [========>.....................] - ETA: 0s1408/4200 [=========>....................] - ETA: 0s1536/4200 [=========>....................] - ETA: 0s1664/4200 [==========>...................] - ETA: 0s1792/4200 [===========>..................] - ETA: 0s1920/4200 [============>.................] - ETA: 0s2048/4200 [=============>................] - ETA: 0s2176/4200 [==============>...............] - ETA: 0s2304/4200 [===============>..............] - ETA: 0s2432/4200 [================>.............] - ETA: 0s2560/4200 [=================>............] - ETA: 0s2688/4200 [==================>...........] - ETA: 0s2816/4200 [===================>..........] - ETA: 0s2944/4200 [====================>.........] - ETA: 0s3072/4200 [====================>.........] - ETA: 0s3200/4200 [=====================>........] - ETA: 0s3328/4200 [======================>.......] - ETA: 0s3456/4200 [=======================>......] - ETA: 0s3584/4200 [========================>.....] - ETA: 0s3712/4200 [=========================>....] - ETA: 0s3840/4200 [==========================>...] - ETA: 0s3968/4200 [===========================>..] - ETA: 0s4096/4200 [============================>.] - ETA: 0s4200/4200 [==============================] - 0s     
Out[78]:
0.096190476190476187

Ce model donne 0.97171 avec les paramètres

nb_epoch=10, batch_size=16, validation_split=0.1

Ce model donne 0.97400 avec les paramètres

nb_epoch=30, batch_size=8, validation_split=0.05

Modèle 2


In [12]:
#Yet another model
model1 = Sequential()

# first convolutional layer
model1.add(Convolution2D(32,1,2,2))
model1.add(Activation('relu'))

# second convolutional layer
model1.add(Convolution2D(48, 32, 2, 2))
model1.add(Activation('relu')) 
model1.add(MaxPooling2D(poolsize=(2,2)))

# third convolutional layer
model1.add(Convolution2D(32, 48, 2, 2))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(poolsize=(2,2)))

# convert convolutional filters to flatt so they can be feed to 
# fully connected layers
model1.add(Flatten())

# first fully connected layer
model1.add(Dense(32*6*6, 128, init='lecun_uniform'))
model1.add(Activation('relu'))
model1.add(Dropout(0.25))

# second fully connected layer
model1.add(Dense(128, 128, init='lecun_uniform'))
model1.add(Activation('relu'))
model1.add(Dropout(0.25))

# last fully connected layer which output classes
model1.add(Dense(128, 10, init='lecun_uniform'))
model1.add(Activation('softmax'))

In [13]:
# setting sgd optimizer parameters
sgd = SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
model1.compile(loss='mse', optimizer=sgd)

In [14]:
model1.fit(X_train_reshaped, y_train, nb_epoch=1, batch_size=1000, validation_split=0.1, show_accuracy=True, 
           verbose=1)


Train on 34020 samples, validate on 3780 samples
Epoch 0
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-14-6dc67d9017aa> in <module>()
      1 model1.fit(X_train_reshaped, y_train, nb_epoch=1, batch_size=1000, validation_split=0.1, show_accuracy=True, 
----> 2            verbose=1)

/home/issam/anaconda/lib/python2.7/site-packages/Keras-0.0.1-py2.7.egg/keras/models.pyc in fit(self, X, y, batch_size, nb_epoch, verbose, validation_split, validation_data, shuffle, show_accuracy)
    132 
    133                 if show_accuracy:
--> 134                     loss, acc = self._train_with_acc(X_batch, y_batch)
    135                 else:
    136                     loss = self._train(X_batch, y_batch)

/home/issam/anaconda/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    604                         self.fn.nodes[self.fn.position_of_error],
    605                         self.fn.thunks[self.fn.position_of_error],
--> 606                         storage_map=self.fn.storage_map)
    607                 else:
    608                     # For the c linker We don't have access from

/home/issam/anaconda/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    593         t0_fn = time.time()
    594         try:
--> 595             outputs = self.fn()
    596         except Exception:
    597             if hasattr(self.fn, 'position_of_error'):

ValueError: GpuElemwise. Input dimension mis-match. Input 1 (indices start at 0) has shape[1] == 1, but the output's size on that axis is 10.
Apply node that caused the error: GpuElemwise{Sub}[(0, 0)](GpuSoftmaxWithBias.0, GpuFromHost.0)
Inputs types: [CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix)]
Inputs shapes: [(1000, 10), (1000, 1)]
Inputs strides: [(10, 1), (1, 0)]
Inputs values: ['not shown', 'not shown']

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [124]:
accuracy_score(y_valid, model1.predict_classes(X_valid_reshaped))


128/4200 [..............................] - ETA: 0s256/4200 [>.............................] - ETA: 0s384/4200 [=>............................] - ETA: 0s512/4200 [==>...........................] - ETA: 0s640/4200 [===>..........................] - ETA: 0s768/4200 [====>.........................] - ETA: 0s896/4200 [=====>........................] - ETA: 0s1024/4200 [======>.......................] - ETA: 0s1152/4200 [=======>......................] - ETA: 0s1280/4200 [========>.....................] - ETA: 0s1408/4200 [=========>....................] - ETA: 0s1536/4200 [=========>....................] - ETA: 0s1664/4200 [==========>...................] - ETA: 0s1792/4200 [===========>..................] - ETA: 0s1920/4200 [============>.................] - ETA: 0s2048/4200 [=============>................] - ETA: 0s2176/4200 [==============>...............] - ETA: 0s2304/4200 [===============>..............] - ETA: 0s2432/4200 [================>.............] - ETA: 0s2560/4200 [=================>............] - ETA: 0s2688/4200 [==================>...........] - ETA: 0s2816/4200 [===================>..........] - ETA: 0s2944/4200 [====================>.........] - ETA: 0s3072/4200 [====================>.........] - ETA: 0s3200/4200 [=====================>........] - ETA: 0s3328/4200 [======================>.......] - ETA: 0s3456/4200 [=======================>......] - ETA: 0s3584/4200 [========================>.....] - ETA: 0s3712/4200 [=========================>....] - ETA: 0s3840/4200 [==========================>...] - ETA: 0s3968/4200 [===========================>..] - ETA: 0s4096/4200 [============================>.] - ETA: 0s4200/4200 [==============================] - 0s     
Out[124]:
0.11142857142857143

In [126]:
a = model1.predict_classes(X_valid_reshaped)


128/4200 [..............................] - ETA: 0s256/4200 [>.............................] - ETA: 0s384/4200 [=>............................] - ETA: 0s512/4200 [==>...........................] - ETA: 0s640/4200 [===>..........................] - ETA: 0s768/4200 [====>.........................] - ETA: 0s896/4200 [=====>........................] - ETA: 0s1024/4200 [======>.......................] - ETA: 0s1152/4200 [=======>......................] - ETA: 0s1280/4200 [========>.....................] - ETA: 0s1408/4200 [=========>....................] - ETA: 0s1536/4200 [=========>....................] - ETA: 0s1664/4200 [==========>...................] - ETA: 0s1792/4200 [===========>..................] - ETA: 0s1920/4200 [============>.................] - ETA: 0s2048/4200 [=============>................] - ETA: 0s2176/4200 [==============>...............] - ETA: 0s2304/4200 [===============>..............] - ETA: 0s2432/4200 [================>.............] - ETA: 0s2560/4200 [=================>............] - ETA: 0s2688/4200 [==================>...........] - ETA: 0s2816/4200 [===================>..........] - ETA: 0s2944/4200 [====================>.........] - ETA: 0s3072/4200 [====================>.........] - ETA: 0s3200/4200 [=====================>........] - ETA: 0s3328/4200 [======================>.......] - ETA: 0s3456/4200 [=======================>......] - ETA: 0s3584/4200 [========================>.....] - ETA: 0s3712/4200 [=========================>....] - ETA: 0s3840/4200 [==========================>...] - ETA: 0s3968/4200 [===========================>..] - ETA: 0s4096/4200 [============================>.] - ETA: 0s4200/4200 [==============================] - 0s     

In [127]:
a


Out[127]:
array([1, 1, 1, ..., 1, 1, 1])

Modèle 3


In [47]:
# Here is another model
model2 = Sequential()
model2.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
model2.add(Activation('relu'))
model2.add(Convolution2D(32, 32, 3, 3))
model2.add(Activation('relu'))
model2.add(MaxPooling2D(poolsize=(2, 2)))
model2.add(Dropout(0.25))

model2.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 
model2.add(Activation('relu'))
model2.add(Convolution2D(64, 64, 3, 3)) 
model2.add(Activation('relu'))
model2.add(MaxPooling2D(poolsize=(2, 2)))
model2.add(Dropout(0.25))

model2.add(Flatten())
model2.add(Dense(64*8*8, 256))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))

model2.add(Dense(256, nb_classes))
model2.add(Activation('relu'))

In [48]:
#sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
#model.compile(loss='categorical_crossentropy', optimizer=sgd)
# we'll use MSE (mean squared error) for the loss, and RMSprop as the optimizer
#model.compile(loss='mse', optimizer='rmsprop')

In [49]:
model2.compile(loss='mse', optimizer='rmsprop')

In [50]:
model2.fit(X_train_reshaped, y_train, nb_epoch=1, batch_size=16, validation_split=0.1, show_accuracy=False, 
           verbose=0)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-50-39e0126bdab7> in <module>()
      1 model1.fit(X_train_reshaped, y_train, nb_epoch=1, batch_size=16, validation_split=0.1, show_accuracy=False, 
----> 2            verbose=0)

/home/issam/anaconda/lib/python2.7/site-packages/Keras-0.0.1-py2.7.egg/keras/models.pyc in fit(self, X, y, batch_size, nb_epoch, verbose, validation_split, validation_data, shuffle, show_accuracy)
    134                     loss, acc = self._train_with_acc(X_batch, y_batch)
    135                 else:
--> 136                     loss = self._train(X_batch, y_batch)
    137 
    138                 # logging

/home/issam/anaconda/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    604                         self.fn.nodes[self.fn.position_of_error],
    605                         self.fn.thunks[self.fn.position_of_error],
--> 606                         storage_map=self.fn.storage_map)
    607                 else:
    608                     # For the c linker We don't have access from

/home/issam/anaconda/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    593         t0_fn = time.time()
    594         try:
--> 595             outputs = self.fn()
    596         except Exception:
    597             if hasattr(self.fn, 'position_of_error'):

ValueError: GpuCorrMM shape inconsistency:
  bottom shape: 16 32 30 30
  weight shape: 3 32 3 3
  top shape: 16 1 28 28 (expected 16 3 28 28)

Apply node that caused the error: GpuCorrMM_gradInputs{valid, (1, 1)}(GpuContiguous.0, GpuContiguous.0)
Inputs types: [CudaNdarrayType(float32, 4D), CudaNdarrayType(float32, 4D)]
Inputs shapes: [(3, 32, 3, 3), (16, 1, 28, 28)]
Inputs strides: [(288, 9, 3, 1), (784, 0, 28, 1)]
Inputs values: ['not shown', 'not shown']

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [ ]:
accuracy_score(y_valid, model.predict_classes(X_valid))

In [ ]:


In [ ]:


In [14]:
preds = model1.predict_classes(X_test, verbose=0)

In [ ]:
def write_preds(preds, fname):
    pd.DataFrame({"ImageId": list(range(1,len(preds)+1)), "Label": preds}).to_csv(fname, index=False, header=True)
write_preds(preds, "../../result/keras-mlp.csv")