In [1]:
from sklearn import datasets, model_selection, svm, metrics
In [2]:
mnist = datasets.fetch_mldata('MNIST original', data_home='data/src/download/')
In [3]:
print(type(mnist))
print(mnist.keys())
In [4]:
mnist_data = mnist.data / 255
mnist_label = mnist.target
In [5]:
print(mnist_data.shape)
print(mnist_label.shape)
In [6]:
train_size = 500
test_size = 100
data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
clf = svm.SVC()
clf.fit(data_train, label_train)
pre = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, pre)
print(ac_score)
In [7]:
import timeit
num = 10
train_size = 500
test_size = 100
data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
In [8]:
clf = svm.SVC()
print(timeit.timeit(lambda: clf.fit(data_train, label_train), number=num) / num)
pre = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, pre)
print(ac_score)
In [9]:
clf = svm.LinearSVC()
print(timeit.timeit(lambda: clf.fit(data_train, label_train), number=num) / num)
pre = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, pre)
print(ac_score)
In [10]:
train_size = 10000
test_size = 2000
data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
clf = svm.LinearSVC()
print(timeit.timeit(lambda: clf.fit(data_train, label_train), number=num) / num)
pre = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, pre)
print(ac_score)
In [11]:
co_mat = metrics.confusion_matrix(label_test, pre)
print(co_mat)