In [32]:
# Import modules
import os
import pandas as pd
import numpy as np
from skimage.io import imread
from lasagne import layers
from lasagne.nonlinearities import softmax
from nolearn.lasagne import NeuralNet, BatchIterator
# Set path
path = os.path.expanduser('~') + '/workspace/julia'
In [28]:
# Define functions
def read_data(typeData, labelsInfo, imageSize, path):
x = np.zeros((labelsInfo.shape[0], imageSize))
for (index, idImage) in enumerate(labelsInfo['ID']):
# use specially created 32 x 32 images
nameFile = '{0}/{1}Resized32/{2}.Bmp'.format(path,
typeData, idImage)
img = imread(nameFile, as_grey = True)
x[index, :] = np.reshape(img, (1, imageSize))
return x
def fit_model(reshaped_train_x, y, image_width,
image_height, reshaped_test_x):
net = NeuralNet(
layers = [
('input', layers.InputLayer),
('conv1', layers.Conv2DLayer),
('pool1', layers.MaxPool2DLayer),
('dropout1', layers.DropoutLayer),
('conv2', layers.Conv2DLayer),
('pool2', layers.MaxPool2DLayer),
('dropout2', layers.DropoutLayer),
('conv3', layers.Conv2DLayer),
('hidden4', layers.DenseLayer),
('output', layers.DenseLayer),
],
input_shape = (None, 1, 32, 32),
conv1_num_filters=32, conv1_filter_size=(5, 5),
pool1_pool_size=(2, 2),
dropout1_p=0.2,
conv2_num_filters=64, conv2_filter_size=(5, 5),
pool2_pool_size=(2, 2),
dropout2_p=0.2,
conv3_num_filters = 128, conv3_filter_size = (5, 5),
hidden4_num_units=500,
output_num_units = 62, output_nonlinearity = softmax,
update_learning_rate = 0.01,
update_momentum = 0.9,
batch_iterator_train = BatchIterator(batch_size = 100),
batch_iterator_test = BatchIterator(batch_size = 100),
use_label_encoder = True,
regression = False,
max_epochs = 100,
verbose = 1,
)
net.fit(reshaped_train_x, y)
prediction = net.predict(reshaped_test_x)
return prediction
In [11]:
# Set variables
imageSize = 1024 # 32 x 32
image_width = image_height = int(imageSize ** 0.5)
labelsInfoTrain = pd.read_csv\
('{0}/trainLabels.csv'.format(path))
labelsInfoTest = pd.read_csv\
('{0}/sampleSubmission.csv'.format(path))
# Load dataset
xTrain = read_data('train', labelsInfoTrain, imageSize, path)
xTest = read_data('test', labelsInfoTest, imageSize, path)
yTrain = map(ord, labelsInfoTrain['Class'])
yTrain = np.array(yTrain)
In [12]:
# Preprocess (Normalize) data
xTrain /= xTrain.std(axis = None)
xTrain -= xTrain.mean()
xTest /= xTest.std(axis = None)
xTest -= xTest.mean()
In [17]:
# Reshape data
train_x_reshaped = xTrain.reshape(xTrain.shape[0], 1,
image_height, image_width).astype('float32')
test_x_reshaped = xTest.reshape(xTest.shape[0], 1,
image_height, image_width).astype('float32')
In [29]:
# Train and test data
predict = fit_model(train_x_reshaped, yTrain, image_width,
image_height, test_x_reshaped)
In [30]:
# Convert for submission
yTest = map(chr, predict)
labelsInfoTest['Class'] = yTest
labelsInfoTest.to_csv('{0}/subzero.csv'.format(path), index = False)
Gives 0.66492 score (results may vary)