In [1]:
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.externals import joblib
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn import datasets
from sknn.mlp import Classifier, Layer
from skimage import io
from skimage import data, segmentation, filters, color, img_as_float, img_as_ubyte, exposure, feature, measure, morphology
from skimage.color import rgb2gray
from skimage.morphology import square
from skimage.feature import hog
import cv2
import numpy as np
from time import time
from glob import glob
import os

In [2]:
np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)

In [3]:
### Plan2 ###

In [4]:
features = joblib.load("./mldata/features_1200.mat")
labels = joblib.load("./mldata/lables_1200.mat")

features = np.array(features, 'int16')
labels = np.array(labels, 'int')

t0 = time()
def scale(X, eps = 0.001):
    # scale the data points s.t the columns of the feature space
    # (i.e the predictors) are within the range [0, 1]
    return (X - np.min(X, axis = 0)) / (np.max(X, axis = 0) + eps)

features = features.astype("float32")
features = scale(features)

print "escape time : ", round(time()-t0, 3), "s"

# scale the data to the range [0, 1] and then construct the training
# and testing splits
(trainX, testX, trainY, testY) = train_test_split(features, labels, test_size = 0.1)
print "the shape of training set %s rows, %s columns" %(trainX.shape[0], trainX.shape[1])
print "the shape of test set %s rows, %s columns" %(testX.shape[0], testX.shape[1])
print "the range of training set : %s ~ %s" %(trainX.min(),trainX.max())
print "the range of test set : %s ~ %s" %(testX.min(),testX.max())


escape time :  0.076 s
the shape of training set 5400 rows, 784 columns
the shape of test set 600 rows, 784 columns
the range of training set : 0.0 ~ 0.999996
the range of test set : 0.0 ~ 0.999996

In [5]:
clf = Classifier(
        layers=[Layer("Rectifier", units=300), Layer("Softmax")],
        learning_rate=0.02,
        learning_rule='momentum',
        learning_momentum=0.9,
        batch_size=25,
        valid_size=0.0,
        n_stable=10,
        n_iter=10,
        verbose=1,
)

In [6]:
t0 = time()

clf.fit(trainX, trainY)
print "escape time : ", round(time()-t0, 3), "s"

y_pred = clf.predict(testX)

print "accuracy score : %s" %(accuracy_score(testY, y_pred))
print "classification report : "
print classification_report(testY, y_pred)
print "confusion matrix : "
print confusion_matrix(testY, y_pred)


escape time :  7.501 s
accuracy score : 0.94
classification report : 
             precision    recall  f1-score   support

          0       0.98      0.95      0.96        58
          1       0.95      0.97      0.96        93
          2       0.94      0.92      0.93        53
          3       0.97      0.91      0.94        67
          4       0.96      0.94      0.95        81
          5       0.91      0.92      0.91        64
          6       0.70      0.88      0.78        34
          7       0.99      0.95      0.97        84
          8       0.97      0.97      0.97        66

avg / total       0.94      0.94      0.94       600

confusion matrix : 
[[55  0  0  0  0  1  2  0  0]
 [ 0 90  0  0  1  1  0  0  1]
 [ 0  0 49  1  0  0  3  0  0]
 [ 0  1  1 61  0  3  0  0  1]
 [ 0  2  0  0 76  0  3  0  0]
 [ 1  0  0  1  1 59  1  1  0]
 [ 0  2  2  0  0  0 30  0  0]
 [ 0  0  0  0  1  1  2 80  0]
 [ 0  0  0  0  0  0  2  0 64]]