In [1]:
def traceLcs(D, x, y):
''' Backtrace for LCS; returns LCS as string. '''
i, j = len(x), len(y) # start in lower-right
st = []
while i > 0 and j > 0:
# get the three contributions
diag, vert, horz = 0, 0, 0
if i > 0 and j > 0:
delt = -1 if x[i-1] == y[j-1] else 1
diag = D[i-1, j-1] + delt
if i > 0: vert = D[i-1, j]
if j > 0: horz = D[i, j-1]
if diag <= vert and diag <= horz:
# diagonal is best, thus, this char is part of LCS
st.append(x[i-1])
i -= 1; j -= 1 # move up and left
elif vert <= horz: i -= 1 # vertical is best; move up
else: j -= 1 # horizontal is best; move left
# reverse it, then return string-ized LCS
return (''.join(st))[::-1]
import numpy
def lcsDp(x, y):
''' Return LCS of x and y. Uses bottom-up dynamic programming
approach. '''
D = numpy.zeros((len(x)+1, len(y)+1), dtype=int)
for i in range(1, len(x)+1):
for j in range(1, len(y)+1):
delt = -1 if x[i-1] == y[j-1] else 1
D[i, j] = min(D[i-1, j-1] + delt, D[i-1, j], D[i, j-1])
return traceLcs(D, x, y), D
In [2]:
lcs, D = lcsDp('ATCTGAT', 'TGCATA') # example from Jones & Pevzner 6.5
lcs
Out[2]:
In [3]:
D
Out[3]: