In [15]:
import numpy as np

from sklearn.datasets import fetch_mldata
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
mnist = fetch_mldata('MNIST original')
mnist


Out[2]:
{'COL_NAMES': ['label', 'data'],
 'DESCR': 'mldata.org dataset: mnist-original',
 'data': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'target': array([ 0.,  0.,  0., ...,  9.,  9.,  9.])}

In [12]:
X, y = mnist.data, mnist.target
X.shape, y.shape


Out[12]:
((70000, 784), (70000,))

In [24]:
X_train, X_test, y_train, y_test = X[:63000], X[63000:], y[:63000], y[63000:]
shuffle_index = np.random.permutation(63000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]
X_train.shape, X_test.shape, y_train.shape, y_test.shape


Out[24]:
((63000, 784), (7000, 784), (63000,), (7000,))

Stochastic Gradient Descent


In [28]:
sgd_clf = SGDClassifier()
sgd_clf.fit(X_train, y_train)
y_pred_sgd = sgd_clf.predict(X_test)
acc_sgd = accuracy_score(y_test, y_pred_sgd)
print(acc_sgd)


0.854

Support Vector Machine


In [27]:
svm_clf = LinearSVC()
svm_clf.fit(X_train, y_train)
y_pred_svc = svm_clf.predict(X_test)
acc_svc = accuracy_score(y_test, y_pred_svc)
print(acc_svc)


0.843714285714

Nearest Neighbors


In [30]:
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train, y_train)
y_pred_knn = knn_clf.predict(X_test)
acc_knn = accuracy_score(y_test, y_pred_knn)
print(acc_knn)


0.960857142857

In [ ]: