CG_BWT_SimpleBuild



In [1]:
def rotations(t):
    # Return list of rotations of input string t
    tt = t * 2
    return [ tt[i:i+len(t)] for i in range(len(t)) ]

In [2]:
rotations('cat')


Out[2]:
['cat', 'atc', 'tca']

In [3]:
def bwm(t):
    # Return lexicographically sorted list of t’s rotations
    return sorted(rotations(t))

In [4]:
bwm('abaaba$')


Out[4]:
['$abaaba', 'a$abaab', 'aaba$ab', 'aba$aba', 'abaaba$', 'ba$abaa', 'baaba$a']

In [5]:
print('\n'.join(bwm('abaaba$')))


$abaaba
a$abaab
aaba$ab
aba$aba
abaaba$
ba$abaa
baaba$a

In [6]:
def bwtViaBwm(t):
    # Given T, returns BWT(T) by way of the BWM
    return ''.join(map(lambda x: x[-1], bwm(t)))

In [7]:
bwtViaBwm('abaaba$') # we can see the result equals the last column of the matrix above


Out[7]:
'abba$aa'

In [8]:
def suffixArray(s):
    satups = sorted([(s[i:], i) for i in range(len(s))])
    return map(lambda x: x[1], satups)

def bwtViaSa(t):
    # Given T, returns BWT(T) by way of the suffix array
    bw = []
    for si in suffixArray(t):
        if si == 0:
            bw.append('$')
        else:
            bw.append(t[si-1])
    return ''.join(bw) # return string-ized version of list bw

In [9]:
bwtViaBwm('abaaba$'), bwtViaSa('abaaba$') # same result


Out[9]:
('abba$aa', 'abba$aa')