In [1]:
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.optimizers import RMSprop
from keras.callbacks import TensorBoard
from __future__ import print_function
from keras.utils import plot_model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from sklearn import preprocessing
import keras
import matplotlib.pyplot as plt
import numpy as np
import math
import pydot
import graphviz
import pandas as pd
1593 handwritten digits from around 80 persons were scanned, stretched in a rectangular box 16x16 in a gray scale of 256 values.Then each pixel of each image was scaled into a bolean (1/0) value using a fixed threshold.
Each person wrote on a paper all the digits from 0 to 9, twice. The commitment was to write the digit the first time in the normal way (trying to write each digit accurately) and the second time in a fast way (with no accuracy).
The best validation protocol for this dataset seems to be a 5x2CV, 50% Tune (Train +Test) and completly blind 50% Validation.
In [2]:
data = pd.read_csv('data/semeion.csv', sep=",", header=None)
In [21]:
data.head()
Out[21]:
In [4]:
data_train = data.sample(frac=0.9, random_state=42)
data_val = data.drop(data_train.index)
In [5]:
df_x_train = data_train.iloc[:,:256]
df_y_train = data_train.iloc[:,256:]
df_x_val = data_val.iloc[:,:256]
df_y_val = data_val.iloc[:,256]
In [18]:
x_train = df_x_train.values
y_train = df_y_train.values
# y_train = keras.utils.to_categorical(y_train)
x_val = df_x_val.values
y_val = df_y_val.values
# y_val = keras.utils.to_categorical(y_val)
# y_val
In [13]:
hidden1_dim = 12
hidden2_dim = 12
In [19]:
model = Sequential()
model.add(Dense(hidden1_dim, activation='relu', input_shape=(256,)))
model.add(Dropout(0.1))
model.add(Dense(hidden2_dim, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=24,
epochs=100,
verbose=0,
shuffle=True,
validation_split=0.1)
score = model.evaluate(x_val, y_val)[1]
print(score)