In [4]:
from numpy import *
from PIL import *
import pickle
from pylab import *
import os
from scipy.misc import *
from matplotlib.pyplot import *

In [13]:
import cv2
import imtools
imtools = reload(imtools)

In [77]:
def compute_feature(im):
    """ Returns a feature vector for an
    ocr image patch. """
    
    # resize and remove border
    norm_im = imresize(im, (30, 30))
    norm_im = norm_im[3:-3, 3:-3]
    m = cv2.moments(norm_im)
    hu = cv2.HuMoments(m)
    hu = hu
    
    return hu.flatten()

In [78]:
def load_ocr_data(path):
    """ Return labels and ocr features for all images in path. """
    
    # create list of all files ending in .jpg
    imlist = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg')]
    
    labels = [int(imfile.split('/')[-1][0]) for imfile in imlist]
    features = []
    for imname in imlist:
        im = array(Image.open(imname).convert('L'))
        features.append(compute_feature(im))
    return array(features), labels

In [79]:
from svmutil import *

In [126]:
features, labels = load_ocr_data('sudoku_images/ocr_data/training/')
test_features, test_labels = load_ocr_data('sudoku_images/ocr_data/testing/')
# freatures = array([f/linalg.norm(f) for f in features.T if linalg.norm(f)>0]).T

features = map(list, features)
test_features = map(list, test_features)

In [144]:
prob = svm_problem(labels, features)
param = svm_parameter('-t 0')
m = svm_train(prob, param)

In [145]:
res = svm_predict(labels, features, m)


Accuracy = 30.1632% (425/1409) (classification)

In [146]:
res = svm_predict(test_labels, test_features, m)


Accuracy = 22.3671% (223/997) (classification)

In [ ]:
# Not very good. Any way to improve?