In [4]:
from numpy import *
from PIL import *
import pickle
from pylab import *
import os
from scipy.misc import *
from matplotlib.pyplot import *
In [13]:
import cv2
import imtools
imtools = reload(imtools)
In [77]:
def compute_feature(im):
""" Returns a feature vector for an
ocr image patch. """
# resize and remove border
norm_im = imresize(im, (30, 30))
norm_im = norm_im[3:-3, 3:-3]
m = cv2.moments(norm_im)
hu = cv2.HuMoments(m)
hu = hu
return hu.flatten()
In [78]:
def load_ocr_data(path):
""" Return labels and ocr features for all images in path. """
# create list of all files ending in .jpg
imlist = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg')]
labels = [int(imfile.split('/')[-1][0]) for imfile in imlist]
features = []
for imname in imlist:
im = array(Image.open(imname).convert('L'))
features.append(compute_feature(im))
return array(features), labels
In [79]:
from svmutil import *
In [126]:
features, labels = load_ocr_data('sudoku_images/ocr_data/training/')
test_features, test_labels = load_ocr_data('sudoku_images/ocr_data/testing/')
# freatures = array([f/linalg.norm(f) for f in features.T if linalg.norm(f)>0]).T
features = map(list, features)
test_features = map(list, test_features)
In [144]:
prob = svm_problem(labels, features)
param = svm_parameter('-t 0')
m = svm_train(prob, param)
In [145]:
res = svm_predict(labels, features, m)
In [146]:
res = svm_predict(test_labels, test_features, m)
In [ ]:
# Not very good. Any way to improve?