# CG_DP_EditDist

``````

In [1]:

def hammingDistance(x, y):
''' Return Hamming distance between x and y '''
assert len(x) == len(y)
nmm = 0
for i in range(0, len(x)):
if x[i] != y[i]:
nmm += 1
return nmm

``````
``````

In [2]:

hammingDistance('brown', 'blown')

``````
``````

Out[2]:

1

``````
``````

In [3]:

hammingDistance('cringe', 'orange')

``````
``````

Out[3]:

2

``````
``````

In [4]:

def boundEditDistance(x, y):
''' Return loose lower and upper bounds on the edit distance
between x and y in O(|x| + |y|) time. '''
if x == y: return 0, 0
if len(x) == 0: return len(y), len(y)
if len(y) == 0: return len(x), len(x)
diff = abs(len(x) - len(y))
lower = diff
if lower == 0 and x != y:
lower = 1
minlen = min(len(x), len(y))
upper = hammingDistance(x[:minlen], y[:minlen]) + diff
return lower, upper

``````
``````

In [5]:

boundEditDistance('brown', 'blown')

``````
``````

Out[5]:

(1, 1)

``````
``````

In [6]:

boundEditDistance('create', 'creation')

``````
``````

Out[6]:

(2, 3)

``````
``````

In [7]:

boundEditDistance('aaa', 'bbb')

``````
``````

Out[7]:

(1, 3)

``````
``````

In [8]:

# note: bounds can be pretty rough
boundEditDistance('the longest', 'longest day')

``````
``````

Out[8]:

(1, 11)

``````
``````

In [9]:

boundEditDistance('Shakespeare', 'shake spear')

``````
``````

Out[9]:

(1, 7)

``````
``````

In [10]:

def edDistRecursive(x, y):
if len(x) == 0: return len(y)
if len(y) == 0: return len(x)
delt = 1 if x[-1] != y[-1] else 0
diag = edDistRecursive(x[:-1], y[:-1]) + delt
vert = edDistRecursive(x[:-1], y) + 1
horz = edDistRecursive(x, y[:-1]) + 1
return min(diag, vert, horz)

``````
``````

In [11]:

edDistRecursive('Shakespeare', 'shake spear') # this takes a while!

``````
``````

Out[11]:

3

``````
``````

In [12]:

# let's see how long it takes
from time import time
st = time()
edDistRecursive('Shakespeare', 'shake spear')
print('Took %0.3f seconds' % (time() - st))

``````
``````

Took 25.144 seconds

``````
``````

In [13]:

def edDistRecursiveMemo(x, y, memo=None):
''' A version of edDistRecursive with memoization.  For each x, y we see, we
record result from edDistRecursiveMemo(x, y).  In the future, we retrieve
recorded result rather than re-run the function. '''
if memo is None: memo = {}
if len(x) == 0: return len(y)
if len(y) == 0: return len(x)
if (len(x), len(y)) in memo:
return memo[(len(x), len(y))]
delt = 1 if x[-1] != y[-1] else 0
diag = edDistRecursiveMemo(x[:-1], y[:-1], memo) + delt
vert = edDistRecursiveMemo(x[:-1], y, memo) + 1
horz = edDistRecursiveMemo(x, y[:-1], memo) + 1
ans = min(diag, vert, horz)
memo[(len(x), len(y))] = ans
return ans

``````
``````

In [14]:

edDistRecursiveMemo('Shakespeare', 'shake spear')

``````
``````

Out[14]:

3

``````
``````

In [15]:

# this time it's instantaneous
from time import time
st = time()
edDistRecursiveMemo('Shakespeare', 'shake spear')
print('Took %0.3f seconds' % (time() - st))

``````
``````

Took 0.000 seconds

``````
``````

In [16]:

from numpy import zeros

def edDistDp(x, y):
""" Calculate edit distance between sequences x and y using
matrix dynamic programming.  Return distance. """
D = zeros((len(x)+1, len(y)+1), dtype=int)
D[0, 1:] = range(1, len(y)+1)
D[1:, 0] = range(1, len(x)+1)
for i in range(1, len(x)+1):
for j in range(1, len(y)+1):
delt = 1 if x[i-1] != y[j-1] else 0
D[i, j] = min(D[i-1, j-1]+delt, D[i-1, j]+1, D[i, j-1]+1)
return D[len(x), len(y)]

``````
``````

In [17]:

edDistDp('Shakespeare', 'shake spear')

``````
``````

Out[17]:

3

``````
``````

In [18]:

# again, instantaneous
from time import time
st = time()
edDistDp('Shakespeare', 'shake spear')
print('Took %0.3f seconds' % (time() - st))

``````
``````

Took 0.000 seconds

``````
``````

In [19]:

edDistDp('GCGTATGCACGC', 'GCTATGCCACGC')

``````
``````

Out[19]:

2

``````