CG_BWT_SimpleBuild

``````

In [1]:

def rotations(t):
# Return list of rotations of input string t
tt = t * 2
return [ tt[i:i+len(t)] for i in range(len(t)) ]

``````
``````

In [2]:

rotations('cat')

``````
``````

Out[2]:

['cat', 'atc', 'tca']

``````
``````

In [3]:

def bwm(t):
# Return lexicographically sorted list of t’s rotations
return sorted(rotations(t))

``````
``````

In [4]:

bwm('abaaba\$')

``````
``````

Out[4]:

['\$abaaba', 'a\$abaab', 'aaba\$ab', 'aba\$aba', 'abaaba\$', 'ba\$abaa', 'baaba\$a']

``````
``````

In [5]:

print('\n'.join(bwm('abaaba\$')))

``````
``````

\$abaaba
a\$abaab
aaba\$ab
aba\$aba
abaaba\$
ba\$abaa
baaba\$a

``````
``````

In [6]:

def bwtViaBwm(t):
# Given T, returns BWT(T) by way of the BWM
return ''.join(map(lambda x: x[-1], bwm(t)))

``````
``````

In [7]:

bwtViaBwm('abaaba\$') # we can see the result equals the last column of the matrix above

``````
``````

Out[7]:

'abba\$aa'

``````
``````

In [8]:

def suffixArray(s):
satups = sorted([(s[i:], i) for i in range(len(s))])
return map(lambda x: x[1], satups)

def bwtViaSa(t):
# Given T, returns BWT(T) by way of the suffix array
bw = []
for si in suffixArray(t):
if si == 0:
bw.append('\$')
else:
bw.append(t[si-1])
return ''.join(bw) # return string-ized version of list bw

``````
``````

In [9]:

bwtViaBwm('abaaba\$'), bwtViaSa('abaaba\$') # same result

``````
``````

Out[9]:

('abba\$aa', 'abba\$aa')

``````