notebook.community

Edit and run



In [3]:

    
# adapted from http://stackoverflow.com/a/21675241

import numpy

from shogun.Features import *
from shogun.Kernel import *
from shogun.Classifier import *
from shogun.Evaluation import *
from modshogun import StringCharFeatures, RAWBYTE
# from shogun.Kernel import SSKStringKernel
from shogun.Kernel import StringSubsequenceKernel


strings = ['cat', 'doom', 'car', 'boom']
test = ['bat', 'soon']

train_labels  = numpy.array([1, -1, 1, -1])
test_labels = numpy.array([1, -1])

features = StringCharFeatures(strings, RAWBYTE)
test_features = StringCharFeatures(test, RAWBYTE)



In [4]:

    
# 1 is n and 0.5 is lambda as described in Lodhi 2002
# sk = SSKStringKernel(features, features, 1, 0.5)
sk = StringSubsequenceKernel(features, features, 1, 0.5)

# Train the Support Vector Machine
labels = BinaryLabels(train_labels)
C = 1.0
svm = LibSVM(C, sk, labels)
svm.train()

# Prediction
predicted_labels = svm.apply(test_features).get_labels()
print predicted_labels









    



[ 1. -1.]



In [9]:

    
sk.get_kernel_matrix()









    Out[9]:





array([[ 0.66666667,  0.        ],
       [ 0.        ,  0.66666667],
       [ 0.33333333,  0.        ],
       [ 0.23570226,  0.66666667]])



In [13]:

    
s = StringCharFeatures(['cat'], RAWBYTE)
t = StringCharFeatures(['bat'], RAWBYTE)
lamda_weight = 1
n = 1

ssk = StringSubsequenceKernel(s, t, n, lamda_weight)



In [55]:

    
s = StringCharFeatures(['cat'], RAWBYTE)
t = StringCharFeatures(['bat'], RAWBYTE)
for n in range(5):
    ssk = StringSubsequenceKernel(s, t, n, lamda_weight)
    print n, ssk.get_kernel_matrix()









    



0 [[ 0.]]
1 [[ 0.66666667]]
2 [[ 0.5]]
3 [[ 0.42857143]]
4 [[ 0.42857143]]



In [54]:

    
s = StringCharFeatures(['cat'], RAWBYTE)
for n in range(5):
    ssk = StringSubsequenceKernel(s, s, n, lamda_weight)
    print n, ssk.get_kernel_matrix()









    



0 [[ 0.]]
1 [[ 1.]]
2 [[ 1.]]
3 [[ 1.]]
4 [[ 1.]]



In [36]:

    
ssk.kernel(0,0) # idx_a, idx_b









    Out[36]:





0.6666666666666667



In [39]:

    
ssk.get_lhs()









    Out[39]:





StringFeatures



In [41]:

    
ssk.get_feature_class()









    Out[41]:





30



In [43]:

    
ssk.get_feature_type()









    Out[43]:





10



In [47]:

    
ssk.get_kernel_col(0)









    Out[47]:





array([ 0.66666667])



In [49]:

    
ssk.get_kernel_row(0)









    Out[49]:





array([ 0.66666667])



In [50]:

    
ssk.get_kernel_matrix()









    Out[50]:





array([[ 0.66666667]])



In [75]:

    
kant1 = 'science is organized knowledge'
kant2 = 'wisdom is organized life'

s = StringCharFeatures([kant1], RAWBYTE)
t = StringCharFeatures([kant2], RAWBYTE)
for n in range(1, 7):
    ssk = StringSubsequenceKernel(s, t, n, lamda_weight)
    print n, ssk.get_kernel_matrix(), ssk.get_kernel_matrix()**2









    



1 [[ 0.84867922]] [[ 0.72025641]]
2 [[ 0.62349701]] [[ 0.38874853]]
3 [[ 0.42388399]] [[ 0.17967763]]
4 [[ 0.27714312]] [[ 0.07680831]]
5 [[ 0.17785773]] [[ 0.03163337]]
6 [[ 0.1135452]] [[ 0.01289251]]



In [59]:

    
# CSqrtDiagKernelNormalizer - divide kernel by square root of product of diagonal 
ssk.get_normalizer()









    Out[59]:





SqrtDiagKernelNormalizer



In [71]:

    
import math

diag = 0.5
print math.sqrt(diag)









    



0.707106781187



In [74]:

    
norm = 0.707106781187

print norm ** 2









    



0.500000000001



In [60]:

    
ssk.get_kernel_diagonal()









    Out[60]:





array([ 0.1135452])



In [61]:

    
ssk.set_normalizer('IDENTITY')









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-61-40fd1c9108d0> in <module>()
----> 1 ssk.set_normalizer('IDENTITY')

TypeError: in method 'Kernel_set_normalizer', argument 2 of type 'shogun::CKernelNormalizer *'



In [66]:

    
import modshogun

# from modshogun.KernelNormalizer import IdentityKernelNormalizer
import shogun.KernelNormalizer









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-66-d63679f195db> in <module>()
      2 
      3 # from modshogun.KernelNormalizer import IdentityKernelNormalizer
----> 4 import shogun.KernelNormalizer

ImportError: No module named KernelNormalizer