In [1]:
from numpy import *
from PIL import *
import pickle
from pylab import *
import os
from scipy.misc import *
from matplotlib.pyplot import *

In [2]:
# data is taken from this page
# http://yann.lecun.com/exdb/mnist/
train_file = list(fromfile('handwriting/train-images.idx3-ubyte', uint8, -1))

In [3]:
print int(train_file[0])


0

In [4]:
nbr_img = train_file[4]*(256**3)+train_file[5]*(256**2)+train_file[6]*256+train_file[7]
width = train_file[8]*(256**3)+train_file[9]*(256**2)+train_file[10]*256+train_file[11]
height = train_file[12]*(256**3)+train_file[13]*(256**2)+train_file[14]*256+train_file[15]
print nbr_img, width, height


60000 28 28

In [5]:
features = []
siz = height*width
for i in range(nbr_img):
    img = train_file[16+i*siz:16+(i+1)*siz]
    features.append(img)

In [6]:
label_file = list(fromfile('handwriting/train-labels.idx1-ubyte', uint8, -1))
nbr_labels = label_file[4]*(256**3)+label_file[5]*(256**2)+label_file[6]*256+label_file[7]
labels = label_file[8:8+nbr_labels]

In [7]:
test_file = list(fromfile('handwriting/t10k-images.idx3-ubyte', uint8, -1))
nbr_test_img = test_file[4]*(256**3)+test_file[5]*(256**2)+test_file[6]*256+test_file[7]
width = test_file[8]*(256**3)+test_file[9]*(256**2)+test_file[10]*256+test_file[11]
height = test_file[12]*(256**3)+test_file[13]*(256**2)+test_file[14]*256+test_file[15]
print nbr_test_img, width, height
test_features = []
siz = height*width
for i in range(nbr_test_img):
    img = test_file[16+i*siz:16+(i+1)*siz]
    test_features.append(img)


10000 28 28

In [8]:
test_label_file = list(fromfile('handwriting/t10k-labels.idx1-ubyte', uint8, -1))
nbr_test_labels = test_label_file[4]*(256**3)+test_label_file[5]*(256**2)+test_label_file[6]*256+test_label_file[7]
test_labels = test_label_file[8:8+nbr_test_labels]

In [9]:
from svmutil import *

In [10]:
import time

In [11]:
nbr_test = 30000

start = time.time()
prob = svm_problem(labels[:nbr_test], features[:nbr_test])
param = svm_parameter('-t 0')
m = svm_train(prob, param)
end = time.time()
print (end-start)


159.854454041

In [12]:
res = svm_predict(labels[:nbr_test], features[:nbr_test], m)


Accuracy = 100% (30000/30000) (classification)

In [14]:
nbr_test2 = 10000
res = svm_predict(test_labels[:nbr_test2], test_features[:nbr_test2], m)


Accuracy = 91.43% (9143/10000) (classification)

In [15]:
# This accuracy is far lower than the ranking top records

In [ ]: