# CG_BinarySearchSA

``````

In [1]:

# Not a great way to build a suffix array, but we'll use it
# for the small examples here
def naiveBuildSA(t):
satups = sorted([(t[i:], i) for i in range(len(t))])
return list(map(lambda x: x[1], satups))

``````
``````

In [2]:

naiveBuildSA('abaaba\$') # works on a simple example

``````
``````

Out[2]:

[6, 5, 2, 3, 0, 4, 1]

``````
``````

In [3]:

def binarySearchSA(t, sa, p):
assert t[-1] == '\$' # t already has terminator
assert len(t) == len(sa) # sa is the suffix array for t
if len(t) == 1: return 1
l, r = 0, len(sa) # invariant: sa[l] < p < sa[r]
while True:
c = (l + r) // 2
# determine whether p < T[sa[c]:] by doing comparisons
# starting from left-hand sides of p and T[sa[c]:]
plt = True # assume p < T[sa[c]:] until proven otherwise
i = 0
while i < len(p) and sa[c]+i < len(t):
if p[i] < t[sa[c]+i]:
break # p < T[sa[c]:]
elif p[i] > t[sa[c]+i]:
plt = False
break # p > T[sa[c]:]
i += 1 # tied so far
if plt:
if c == l + 1: return c
r = c
else:
if c == r - 1: return r
l = c

``````
``````

In [4]:

t = 'abaaba\$'
sa = naiveBuildSA(t)
binarySearchSA(t, sa, 'aba')

``````
``````

Out[4]:

3

``````
``````

In [5]:

binarySearchSA(t, sa, 'bb') # p is greater than all suffixes

``````
``````

Out[5]:

7

``````
``````

In [6]:

binarySearchSA(t, sa, 'aa')

``````
``````

Out[6]:

2

``````
``````

In [7]:

def suffixLcp(t, toff, p):
i = 0
while i < len(p) and i + toff < len(t):
if p[i] != t[i + toff]:
return i
i += 1
return i

``````
``````

In [8]:

suffixLcp('abaaba\$', 0, 'aba')

``````
``````

Out[8]:

3

``````
``````

In [9]:

suffixLcp('abaaba\$', 0, 'abab')

``````
``````

Out[9]:

3

``````
``````

In [10]:

suffixLcp('abaaba\$', 0, 'abaabaaba')

``````
``````

Out[10]:

6

``````
``````

In [11]:

def binarySearchSA_lcp1(t, sa, p):
assert t[-1] == '\$' # t already has terminator
assert len(t) == len(sa) # sa is the suffix array for t
if len(t) == 1: return 1
l, r = 0, len(sa) # invariant: sa[l] < p < sa[r]
lcp_lp, lcp_rp = 0, 0
while True:
c = (l + r) // 2
# determine whether p < T[sa[c]:] by doing comparisons
# starting from left-hand sides of p and T[sa[c]:]
plt = True # assume p < T[sa[c]:] until proven otherwise
i = min(lcp_lp, lcp_rp)
while i < len(p) and sa[c]+i < len(t):
if p[i] < t[sa[c]+i]:
break # p < T[sa[c]:]
elif p[i] > t[sa[c]+i]:
plt = False
break # p > T[sa[c]:]
i += 1 # tied so far
if plt:
if c == l + 1: return c
r = c
lcp_rp = i
else:
if c == r - 1: return r
l = c
lcp_lp = i

``````
``````

In [12]:

binarySearchSA_lcp1(t, sa, 'aba')

``````
``````

Out[12]:

3

``````
``````

In [13]:

binarySearchSA_lcp1(t, sa, 'bb')

``````
``````

Out[13]:

7

``````
``````

In [14]:

binarySearchSA_lcp1(t, sa, 'aa')

``````
``````

Out[14]:

2

``````