In [1]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image as pil
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
In [2]:
mnist = fetch_mldata('MNIST original')
print("total data : {}".format(len(mnist.data)))
print("total class : {}".format(len(np.unique(mnist.target))))
In [3]:
X_train, X_test, y_train, y_test = train_test_split(mnist.data, mnist.target, test_size=0.8, random_state=0)
print("Training Data : {}".format(len(X_train)))
print("Test Data : {}".format(len(X_test)))
In [4]:
def image(data):
return data.reshape((28, 28))
def display_datas(N, M=10):
fig = plt.figure(figsize=(10, 10/(10/N)))
plt.subplots_adjust()
for i in range(N):
for j in range(M):
k = i*M+j
ax = fig.add_subplot(N, M, i*M+j+1)
ax.imshow(image(X_train[k]), cmap=plt.cm.bone)
ax.xaxis.set_ticks([])
ax.yaxis.set_ticks([])
plt.title(int(y_train[k])) # add target on title
plt.tight_layout()
plt.show()
In [6]:
display_datas(5)
In [5]:
%time svc_model = SVC(kernel='linear').fit(X_train, y_train)
In [6]:
%time y_pred = svc_model.predict(X_test[:10000])
print(accuracy_score(y_test[:10000], y_pred))
In [7]:
accuracy_score(y_test[:10000], y_pred)
Out[7]:
In [8]:
def display_datas2(k):
fig = plt.figure(figsize=(10, 1))
plt.subplots_adjust()
ax = fig.add_subplot(1, 1, 1)
ax.imshow(image(mnist.data[k]), cmap=plt.cm.bone)
ax.xaxis.set_ticks([])
ax.yaxis.set_ticks([])
plt.tight_layout()
plt.show()
result = svc_model.predict(mnist.data[k].reshape(1, -1))
result_str = "data : {} / predict : {}".format(int(mnist.target[k]), int(result))
print(result_str)
In [11]:
display_datas2(60992)
In [ ]: