In [ ]:
# load the data - download in matlab format from:
# https://www.nist.gov/itl/iad/image-group/emnist-dataset
import scipy.io
mat = scipy.io.loadmat('../Dataset/emnist-letters.mat')
d = mat['dataset']
images = d[0,0]['train'][0,0]['images']
labels = d[0,0]['train'][0,0]['labels']
test_images = d[0,0]['test'][0,0]['images']
test_labels = d[0,0]['test'][0,0]['labels']
print('done')
In [ ]:
import numpy as np # number library
n= 10000 # size of training set
# format the images and labels
images_2 = np.asarray(images[0:n][:],dtype=np.float64)/255
labels_2 = np.asarray(labels[0:n],dtype=np.int32).ravel();
test_images_2 = np.asarray(test_images,dtype=np.float64)/255
test_labels_2 = np.asarray(test_labels,dtype=np.int32).ravel();
print('done')
In [ ]:
# learn the model
import time #for timing the model
start = time.time()
# actually learning the model
from sklearn import svm
clf = svm.SVC()
clf.fit(images_2,labels_2)
end = time.time()
print(end - start)
print('done')
In [ ]:
start = time.time()
#
answers = clf.predict(test_images_2[:10000][:])
#
end = time.time()
print(end-start)
print('done')
In [ ]:
s=[i==j for i,j in zip(test_labels_2,answers)]
100*sum(s)/len(s)
In [ ]:
import coremltools
coreml_model = coremltools.converters.sklearn.convert(clf, "imageAlphaValues", output_feature_names='letterIndex')
coreml_model.author = 'Kate Bonnen and Conrad Stoll'
coreml_model.license = 'MIT'
coreml_model.short_description = "Recognize the hand-drawn letter from an input image."
coreml_model.input_description['imageAlphaValues'] = 'The input image alpha values from top down left to right.'
coreml_model.output_description['letterIndex'] = 'Most likely letter index, ranging from 1 to 26.'
coreml_model.save('../Models/letters_svm'+'.mlmodel')
In [ ]: