In [1]:
import pandas as pd
import numpy as np
from numpy import *
from sklearn import cross_validation
from sklearn.svm import SVC

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

trainX = train.ix[:,1:].values.astype('float32')
trainY = train.ix[:,0].values.astype('int32')

testX = test.ix[:,:].values.astype('float32')

""" Rescale grayscale from -1 to 1 """
trainX = trainX/255.0*2 - 1
testX = testX/255.0*2 - 1

""" Shuffle the input """
shuffle = random.permutation(arange(trainX.shape[0]))
trainX, trainY = trainX[shuffle], trainY[shuffle]

""" Initialise the model, the best model parameters are reported at
http://peekaboo-vision.blogspot.co.uk/2010/09/mnist-for-ever.html """
clf = SVC(kernel="rbf", C=2.8, gamma=.0073, cache_size=3000)

""" Train and validate the model with 7-fold cross validation """
scores = cross_validation.cross_val_score(clf, trainX, trainY, cv=2)

print scores


[ 0.97924106  0.97571082]

In [13]:
clf = SVC(kernel="rbf", C=2.8, gamma=.0073, cache_size=3000)
clf.fit(trainX, trainY)

preds = clf.predict(testX)

# Create your submission file
submission = pd.DataFrame({"ImageId": range(1,len(preds)+1), "Label": preds})
submission.to_csv("submission.csv", index=False)

In [15]:
submission.head(2)


Out[15]:
ImageId Label
0 1 2
1 2 0

In [16]:
# 98th/660, LB : 0.98386

In [17]:
# Adrian Rosebrock의 코드로 시도해보자
# http://www.pyimagesearch.com/2014/06/23/applying-deep-learning-rbm-mnist-using-python/

In [19]:
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from time import time

In [20]:
def scale(X, eps = 0.001):
    # scale the data points s.t the columns of the feature space
    # (i.e the predictors) are within the range [0, 1]
    return (X - np.min(X, axis = 0)) / (np.max(X, axis = 0) + eps)

t0 = time()

trainX = train.ix[:,1:].values.astype('float32')
trainY = train.ix[:,0].values.astype('int32')

testX = test.ix[:,:].values.astype('float32')

trainX = scale(trainX)
testX = scale(testX)

# initialize the RBM + Logistic Regression classifier with
rbm = BernoulliRBM(n_components = 200, n_iter = 40, learning_rate = 0.01,  verbose = True)
logistic = LogisticRegression(C = 1.0)

# train the classifier and show an evaluation report
clf = Pipeline([("rbm", rbm), ("logistic", logistic)])
clf.fit(trainX, trainY)

preds = clf.predict(testX)

print "escape time : ", round(time()-t0, 3), "s"

# Create your submission file
submission = pd.DataFrame({"ImageId": range(1,len(preds)+1), "Label": preds})
submission.to_csv("submission.csv", index=False)

# LB : 0.95600


[BernoulliRBM] Iteration 1, pseudo-likelihood = -104.57, time = 19.14s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -91.22, time = 22.66s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -84.36, time = 22.84s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -80.95, time = 22.90s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -79.42, time = 22.74s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -75.74, time = 22.53s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -73.40, time = 22.70s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -72.00, time = 22.91s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -73.09, time = 22.60s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -70.62, time = 22.73s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -71.48, time = 22.77s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -70.25, time = 22.44s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -71.08, time = 22.69s
[BernoulliRBM] Iteration 14, pseudo-likelihood = -67.20, time = 22.80s
[BernoulliRBM] Iteration 15, pseudo-likelihood = -66.58, time = 22.92s
[BernoulliRBM] Iteration 16, pseudo-likelihood = -67.37, time = 22.80s
[BernoulliRBM] Iteration 17, pseudo-likelihood = -70.06, time = 22.70s
[BernoulliRBM] Iteration 18, pseudo-likelihood = -69.06, time = 22.80s
[BernoulliRBM] Iteration 19, pseudo-likelihood = -65.31, time = 22.93s
[BernoulliRBM] Iteration 20, pseudo-likelihood = -66.57, time = 22.78s
[BernoulliRBM] Iteration 21, pseudo-likelihood = -65.43, time = 22.87s
[BernoulliRBM] Iteration 22, pseudo-likelihood = -67.43, time = 22.68s
[BernoulliRBM] Iteration 23, pseudo-likelihood = -66.14, time = 22.57s
[BernoulliRBM] Iteration 24, pseudo-likelihood = -64.17, time = 22.78s
[BernoulliRBM] Iteration 25, pseudo-likelihood = -66.46, time = 22.78s
[BernoulliRBM] Iteration 26, pseudo-likelihood = -64.85, time = 22.68s
[BernoulliRBM] Iteration 27, pseudo-likelihood = -66.71, time = 22.83s
[BernoulliRBM] Iteration 28, pseudo-likelihood = -66.06, time = 22.99s
[BernoulliRBM] Iteration 29, pseudo-likelihood = -67.01, time = 22.94s
[BernoulliRBM] Iteration 30, pseudo-likelihood = -64.51, time = 22.64s
[BernoulliRBM] Iteration 31, pseudo-likelihood = -64.07, time = 22.82s
[BernoulliRBM] Iteration 32, pseudo-likelihood = -64.12, time = 22.88s
[BernoulliRBM] Iteration 33, pseudo-likelihood = -65.64, time = 22.85s
[BernoulliRBM] Iteration 34, pseudo-likelihood = -64.31, time = 22.70s
[BernoulliRBM] Iteration 35, pseudo-likelihood = -63.17, time = 22.85s
[BernoulliRBM] Iteration 36, pseudo-likelihood = -62.69, time = 22.81s
[BernoulliRBM] Iteration 37, pseudo-likelihood = -65.18, time = 22.80s
[BernoulliRBM] Iteration 38, pseudo-likelihood = -64.81, time = 22.90s
[BernoulliRBM] Iteration 39, pseudo-likelihood = -64.90, time = 22.93s
[BernoulliRBM] Iteration 40, pseudo-likelihood = -63.69, time = 22.91s
escape time :  968.028 s