In [1]:
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.optimizers import RMSprop
from keras.callbacks import TensorBoard
from __future__ import print_function
from keras.utils import plot_model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from sklearn import preprocessing

import keras
import matplotlib.pyplot as plt
import numpy as np
import math
import pydot
import graphviz
import pandas as pd


Using TensorFlow backend.

Data Set Information

1593 handwritten digits from around 80 persons were scanned, stretched in a rectangular box 16x16 in a gray scale of 256 values.Then each pixel of each image was scaled into a bolean (1/0) value using a fixed threshold.

Each person wrote on a paper all the digits from 0 to 9, twice. The commitment was to write the digit the first time in the normal way (trying to write each digit accurately) and the second time in a fast way (with no accuracy).

The best validation protocol for this dataset seems to be a 5x2CV, 50% Tune (Train +Test) and completly blind 50% Validation.


In [2]:
data = pd.read_csv('data/semeion.csv', sep=",", header=None)

In [21]:
data.head()


Out[21]:
0 1 2 3 4 5 6 7 8 9 ... 256 257 258 259 260 261 262 263 264 265
0 0 0 0 0 0 0 1 1 1 1 ... 1 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 1 1 1 1 1 ... 1 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 1 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 1 1 1 1 ... 1 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 1 ... 1 0 0 0 0 0 0 0 0 0

5 rows × 266 columns


In [4]:
data_train = data.sample(frac=0.9, random_state=42)
data_val = data.drop(data_train.index)

In [5]:
df_x_train = data_train.iloc[:,:256]
df_y_train = data_train.iloc[:,256:]

df_x_val = data_val.iloc[:,:256]
df_y_val = data_val.iloc[:,256]

In [18]:
x_train = df_x_train.values
y_train = df_y_train.values
# y_train = keras.utils.to_categorical(y_train)

x_val = df_x_val.values
y_val = df_y_val.values
# y_val = keras.utils.to_categorical(y_val)
# y_val

Neural Net


In [13]:
hidden1_dim = 12
hidden2_dim = 12

In [19]:
model = Sequential()
model.add(Dense(hidden1_dim, activation='relu', input_shape=(256,)))
model.add(Dropout(0.1))
model.add(Dense(hidden2_dim, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

model.fit(x_train, y_train, 
          batch_size=24,
          epochs=100,
          verbose=0,
          shuffle=True,
          validation_split=0.1)
score = model.evaluate(x_val, y_val)[1]
print(score)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-77651a216051> in <module>()
     16           shuffle=True,
     17           validation_split=0.1)
---> 18 score = model.evaluate(x_val, y_val)[1]
     19 print(score)

C:\ProgramData\Anaconda3\lib\site-packages\keras\models.py in evaluate(self, x, y, batch_size, verbose, sample_weight)
    987                                    batch_size=batch_size,
    988                                    verbose=verbose,
--> 989                                    sample_weight=sample_weight)
    990 
    991     def predict(self, x, batch_size=32, verbose=0):

C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py in evaluate(self, x, y, batch_size, verbose, sample_weight, steps)
   1714             sample_weight=sample_weight,
   1715             check_batch_axis=False,
-> 1716             batch_size=batch_size)
   1717         # Prepare inputs, delegate logic to `_test_loop`.
   1718         if self.uses_learning_phase and not isinstance(K.learning_phase(), int):

C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
   1409                                     output_shapes,
   1410                                     check_batch_axis=False,
-> 1411                                     exception_prefix='target')
   1412         sample_weights = _standardize_sample_weights(sample_weight,
   1413                                                      self._feed_output_names)

C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py in _standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    151                             ' to have shape ' + str(shapes[i]) +
    152                             ' but got array with shape ' +
--> 153                             str(array.shape))
    154     return arrays
    155 

ValueError: Error when checking target: expected dense_6 to have shape (None, 10) but got array with shape (159, 1)