In [1]:
# Not a great way to build a suffix array, but we'll use it
# for the small examples here
def naiveBuildSA(t):
satups = sorted([(t[i:], i) for i in range(len(t))])
return list(map(lambda x: x[1], satups))
In [2]:
# Simple function calculating LCP of two string
def lcp(x, y):
for i in range(min(len(x), len(y))):
if x[i] != y[i]: return i
return min(len(x), len(y))
In [3]:
lcp('start', 'stark')
Out[3]:
In [4]:
lcp('start', 'star')
Out[4]:
In [5]:
lcp('yes', 'no')
Out[5]:
In [6]:
# Naive way to calculate LCP1 array given string and its suffix array
def naiveLCP1(t, sa):
return [ lcp(t[sa[i]:], t[sa[i+1]:]) for i in range(len(sa)-1) ]
In [7]:
t = 'abaaba$'
naiveLCP1(t, naiveBuildSA(t))
Out[7]:
In [8]:
t = 'abracadabracada$'
naiveLCP1(t, naiveBuildSA(t))
Out[8]:
In [9]:
naiveBuildSA(t)
Out[9]:
In [10]:
# Calculates (l, c) LCPs and (c, r) LCPs from LCP1 array. Returns pair where
# first element is list of LCPs for (l, c) combos and second is LCPs for
# (c, r) combos.
def precomputeLcps(lcp1):
llcp, rlcp = [None] * len(lcp1), [None] * len(lcp1)
lcp1 += [0]
def precomputeLcpsHelper(l, r):
if l == r-1: return lcp1[l]
c = (l + r) // 2
llcp[c-1] = precomputeLcpsHelper(l, c)
rlcp[c-1] = precomputeLcpsHelper(c, r)
return min(llcp[c-1], rlcp[c-1])
precomputeLcpsHelper(0, len(lcp1))
return llcp, rlcp
In [11]:
precomputeLcps(naiveLCP1(t, naiveBuildSA(t)))
Out[11]: