In [ ]:
# load the data - download in matlab format from:
# https://www.nist.gov/itl/iad/image-group/emnist-dataset
import scipy.io
mat = scipy.io.loadmat('../Dataset/emnist-letters.mat')
d = mat['dataset']
images = d[0,0]['train'][0,0]['images']
labels = d[0,0]['train'][0,0]['labels']
test_images = d[0,0]['test'][0,0]['images']
test_labels = d[0,0]['test'][0,0]['labels']

# choosing the letters to learn
which_half = 2;
if which_half == 1:
    half = [1,2,3,6,7,9,10,13,16,19,21,23,24]
    save = '1'
elif which_half ==2:
    half = [4,5,8,11,12,14,15,17,18,20,22,25,26]
    save = '2'

In [ ]:
import numpy as np  # number library
n= 3000             # size of training set

# format the images and labels
images_2 = np.asarray(images[0:n][:],dtype=np.float64)/255
labels_2 = np.asarray(labels[0:n],dtype=np.int32).ravel();
test_images_2 = np.asarray(test_images,dtype=np.float64)/255
test_labels_2 = np.asarray(test_labels,dtype=np.int32).ravel();

# picking out the letters we want
ind = [val in half for val in labels_2]
labels_half=labels_2[ind]
images_half=images_2[ind][:]

# test images
test_ind = [i in half for i in test_labels_2]
test_labels_half=test_labels_2[test_ind]
test_images_half=test_images_2[test_ind][:]

In [ ]:
# learn the model

import time   #for timing the model
start = time.time()

# actually learning the model
from sklearn import svm
clf = svm.SVC()
clf.fit(images_half,labels_half)

end = time.time()
print(end - start)
print('done')

In [ ]:
start = time.time()
#
answers = clf.predict(test_images_half[:10000][:])
#
end = time.time()
print(end-start)
print('done')

In [ ]:
s=[i==j for i,j in zip(test_labels_half,answers)]
100*sum(s)/len(s)

In [ ]:
import coremltools

coreml_model = coremltools.converters.sklearn.convert(clf, "imageAlphaValues", output_feature_names='letterIndex')

coreml_model.author = 'Kate Bonnen and Conrad Stoll'
coreml_model.license = 'MIT'

coreml_model.short_description = "Recognize the hand-drawn letter from an input image."

coreml_model.input_description['imageAlphaValues'] = 'The input image alpha values from top down left to right.'
coreml_model.output_description['letterIndex'] = 'Most likely letter index, ranging from 1 to 26.'

coreml_model.save('../Models/letters_svm_half_'+ save +'.mlmodel')

In [ ]: