In [3]:
# adapted from http://stackoverflow.com/a/21675241
import numpy
from shogun.Features import *
from shogun.Kernel import *
from shogun.Classifier import *
from shogun.Evaluation import *
from modshogun import StringCharFeatures, RAWBYTE
# from shogun.Kernel import SSKStringKernel
from shogun.Kernel import StringSubsequenceKernel
strings = ['cat', 'doom', 'car', 'boom']
test = ['bat', 'soon']
train_labels = numpy.array([1, -1, 1, -1])
test_labels = numpy.array([1, -1])
features = StringCharFeatures(strings, RAWBYTE)
test_features = StringCharFeatures(test, RAWBYTE)
In [4]:
# 1 is n and 0.5 is lambda as described in Lodhi 2002
# sk = SSKStringKernel(features, features, 1, 0.5)
sk = StringSubsequenceKernel(features, features, 1, 0.5)
# Train the Support Vector Machine
labels = BinaryLabels(train_labels)
C = 1.0
svm = LibSVM(C, sk, labels)
svm.train()
# Prediction
predicted_labels = svm.apply(test_features).get_labels()
print predicted_labels
In [9]:
sk.get_kernel_matrix()
Out[9]:
In [13]:
s = StringCharFeatures(['cat'], RAWBYTE)
t = StringCharFeatures(['bat'], RAWBYTE)
lamda_weight = 1
n = 1
ssk = StringSubsequenceKernel(s, t, n, lamda_weight)
In [55]:
s = StringCharFeatures(['cat'], RAWBYTE)
t = StringCharFeatures(['bat'], RAWBYTE)
for n in range(5):
ssk = StringSubsequenceKernel(s, t, n, lamda_weight)
print n, ssk.get_kernel_matrix()
In [54]:
s = StringCharFeatures(['cat'], RAWBYTE)
for n in range(5):
ssk = StringSubsequenceKernel(s, s, n, lamda_weight)
print n, ssk.get_kernel_matrix()
In [36]:
ssk.kernel(0,0) # idx_a, idx_b
Out[36]:
In [39]:
ssk.get_lhs()
Out[39]:
In [41]:
ssk.get_feature_class()
Out[41]:
In [43]:
ssk.get_feature_type()
Out[43]:
In [47]:
ssk.get_kernel_col(0)
Out[47]:
In [49]:
ssk.get_kernel_row(0)
Out[49]:
In [50]:
ssk.get_kernel_matrix()
Out[50]:
In [75]:
kant1 = 'science is organized knowledge'
kant2 = 'wisdom is organized life'
s = StringCharFeatures([kant1], RAWBYTE)
t = StringCharFeatures([kant2], RAWBYTE)
for n in range(1, 7):
ssk = StringSubsequenceKernel(s, t, n, lamda_weight)
print n, ssk.get_kernel_matrix(), ssk.get_kernel_matrix()**2
In [59]:
# CSqrtDiagKernelNormalizer - divide kernel by square root of product of diagonal
ssk.get_normalizer()
Out[59]:
In [71]:
import math
diag = 0.5
print math.sqrt(diag)
In [74]:
norm = 0.707106781187
print norm ** 2
In [60]:
ssk.get_kernel_diagonal()
Out[60]:
In [61]:
ssk.set_normalizer('IDENTITY')
In [66]:
import modshogun
# from modshogun.KernelNormalizer import IdentityKernelNormalizer
import shogun.KernelNormalizer