In [3]:
# adapted from http://stackoverflow.com/a/21675241

import numpy

from shogun.Features import *
from shogun.Kernel import *
from shogun.Classifier import *
from shogun.Evaluation import *
from modshogun import StringCharFeatures, RAWBYTE
# from shogun.Kernel import SSKStringKernel
from shogun.Kernel import StringSubsequenceKernel


strings = ['cat', 'doom', 'car', 'boom']
test = ['bat', 'soon']

train_labels  = numpy.array([1, -1, 1, -1])
test_labels = numpy.array([1, -1])

features = StringCharFeatures(strings, RAWBYTE)
test_features = StringCharFeatures(test, RAWBYTE)

In [4]:
# 1 is n and 0.5 is lambda as described in Lodhi 2002
# sk = SSKStringKernel(features, features, 1, 0.5)
sk = StringSubsequenceKernel(features, features, 1, 0.5)

# Train the Support Vector Machine
labels = BinaryLabels(train_labels)
C = 1.0
svm = LibSVM(C, sk, labels)
svm.train()

# Prediction
predicted_labels = svm.apply(test_features).get_labels()
print predicted_labels


[ 1. -1.]

In [9]:
sk.get_kernel_matrix()


Out[9]:
array([[ 0.66666667,  0.        ],
       [ 0.        ,  0.66666667],
       [ 0.33333333,  0.        ],
       [ 0.23570226,  0.66666667]])

In [13]:
s = StringCharFeatures(['cat'], RAWBYTE)
t = StringCharFeatures(['bat'], RAWBYTE)
lamda_weight = 1
n = 1

ssk = StringSubsequenceKernel(s, t, n, lamda_weight)

In [55]:
s = StringCharFeatures(['cat'], RAWBYTE)
t = StringCharFeatures(['bat'], RAWBYTE)
for n in range(5):
    ssk = StringSubsequenceKernel(s, t, n, lamda_weight)
    print n, ssk.get_kernel_matrix()


0 [[ 0.]]
1 [[ 0.66666667]]
2 [[ 0.5]]
3 [[ 0.42857143]]
4 [[ 0.42857143]]

In [54]:
s = StringCharFeatures(['cat'], RAWBYTE)
for n in range(5):
    ssk = StringSubsequenceKernel(s, s, n, lamda_weight)
    print n, ssk.get_kernel_matrix()


0 [[ 0.]]
1 [[ 1.]]
2 [[ 1.]]
3 [[ 1.]]
4 [[ 1.]]

In [36]:
ssk.kernel(0,0) # idx_a, idx_b


Out[36]:
0.6666666666666667

In [39]:
ssk.get_lhs()


Out[39]:
StringFeatures

In [41]:
ssk.get_feature_class()


Out[41]:
30

In [43]:
ssk.get_feature_type()


Out[43]:
10

In [47]:
ssk.get_kernel_col(0)


Out[47]:
array([ 0.66666667])

In [49]:
ssk.get_kernel_row(0)


Out[49]:
array([ 0.66666667])

In [50]:
ssk.get_kernel_matrix()


Out[50]:
array([[ 0.66666667]])

In [75]:
kant1 = 'science is organized knowledge'
kant2 = 'wisdom is organized life'

s = StringCharFeatures([kant1], RAWBYTE)
t = StringCharFeatures([kant2], RAWBYTE)
for n in range(1, 7):
    ssk = StringSubsequenceKernel(s, t, n, lamda_weight)
    print n, ssk.get_kernel_matrix(), ssk.get_kernel_matrix()**2


1 [[ 0.84867922]] [[ 0.72025641]]
2 [[ 0.62349701]] [[ 0.38874853]]
3 [[ 0.42388399]] [[ 0.17967763]]
4 [[ 0.27714312]] [[ 0.07680831]]
5 [[ 0.17785773]] [[ 0.03163337]]
6 [[ 0.1135452]] [[ 0.01289251]]

In [59]:
# CSqrtDiagKernelNormalizer - divide kernel by square root of product of diagonal 
ssk.get_normalizer()


Out[59]:
SqrtDiagKernelNormalizer

In [71]:
import math

diag = 0.5
print math.sqrt(diag)


0.707106781187

In [74]:
norm = 0.707106781187

print norm ** 2


0.500000000001

In [60]:
ssk.get_kernel_diagonal()


Out[60]:
array([ 0.1135452])

In [61]:
ssk.set_normalizer('IDENTITY')


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-61-40fd1c9108d0> in <module>()
----> 1 ssk.set_normalizer('IDENTITY')

TypeError: in method 'Kernel_set_normalizer', argument 2 of type 'shogun::CKernelNormalizer *'

In [66]:
import modshogun

# from modshogun.KernelNormalizer import IdentityKernelNormalizer
import shogun.KernelNormalizer


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-66-d63679f195db> in <module>()
      2 
      3 # from modshogun.KernelNormalizer import IdentityKernelNormalizer
----> 4 import shogun.KernelNormalizer

ImportError: No module named KernelNormalizer