In [1]:
import os, sys
import pandas as pd
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline
HOME = os.getenv("HOME")
DATA_DIR = HOME + '/data/mnist/'
In [3]:
#One way to show an image
r = np.random.rand(28, 28, 3)
img = Image.fromarray(r, 'RGB')
img.show()
In [4]:
#another way
plt.imshow(r, interpolation='nearest')
plt.show()
In [5]:
train = pd.read_csv(DATA_DIR + 'train.csv')
In [21]:
from keras.utils.np_utils import to_categorical
split_ind = int(0.8 * train.shape[0])
X = train.iloc[:, 1:].values / 255.0
X_reshape = np.reshape(X, (X.shape[0], 1, 28, 28))
X_train = X_reshape[:split_ind, :]
X_val = X_reshape[split_ind:, :]
Y_train = train.iloc[:split_ind, 0].values
Y_val = train.iloc[split_ind:, 0].values
y_train_ohe = to_categorical(Y_train)
y_val_ohe = to_categorical(Y_val)
In [7]:
#plot the first few number images
s = 16
X_plot = np.reshape(X, (X.shape[0], 28, 28))
fig = plt.figure()
for i in range(16):
fig.add_subplot(4, 4, i + 1)
plt.imshow(X_plot[i] * 255.0, interpolation='nearest', cmap = 'gray')
plt.xticks([])
plt.yticks([])
In [18]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
In [22]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode = 'same', input_shape = (1, 28, 28), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides = None))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train_ohe, batch_size = 128, nb_epoch = 10, validation_data = (X_val, y_val_ohe))
Out[22]:
In [ ]:
In [23]:
model = Sequential()
model.add(Dense(4096, activation='relu', input_dim = 784))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax', W_regularizer = 'l2'))
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])
In [26]:
model.fit(X[:split_ind], y_train_ohe, batch_size = 32, nb_epoch = 20,
validation_data = (X[split_ind:], y_val_ohe))
In [28]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
rf = RandomForestClassifier(n_estimators = 100)
rf.fit(X[:split_ind, :], y_train_ohe)
preds = rf.predict(X[split_ind:])
accuracy_score(y_val_ohe, preds)
Out[28]:
In [31]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode = 'same', input_shape = (1, 28, 28), activation = 'relu'))
model.add(Convolution2D(64, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(Convolution2D(128, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides = None))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train_ohe, batch_size = 32, nb_epoch = 10, validation_data = (X_val, y_val_ohe))
Out[31]:
In [32]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode = 'same', input_shape = (1, 28, 28), activation = 'relu'))
model.add(Convolution2D(64, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(Convolution2D(128, 3, 3, border_mode = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides = None))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512, activation='relu', input_dim = 784))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax', W_regularizer = 'l2'))
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train_ohe, batch_size = 32, nb_epoch = 10, validation_data = (X_val, y_val_ohe))
Out[32]:
In [33]:
1+2
Out[33]:
In [37]:
#test = pd.read_csv(DATA_DIR + 'test.csv')
X_test = test.values / 255.0
X_test_reshape = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))
In [38]:
p = model.predict(X_test_reshape)
#model.save_weights(DATA_DIR + 'mnist_conv.h5')
In [56]:
f = lambda x: np.argmax(x)
df = pd.DataFrame({'ImageId':test.index.values + 1, 'Label': map(f, p)})
In [57]:
df.to_csv(DATA_DIR + 'mnist_conv.csv', index = False)
In [ ]: