# Definition(s)

Z algorithm is used for computing Zi(P) = the length of the longest substring of P that starts at i > 1 and matches a prefix of P.

# Algorithm(s)

``````

In [1]:

def z_algorithm(string):
# z[0] = 0 (by convention)
# z[i] = the longest prefix of string[0:n) and string[i:n) for 1 <= i < n
# Complexity: O(n)

l, r, n = 0, 0, len(string)
z = [0 for i in range(n)]

for i in range(1, n):
if i <= r:
z[i] = min(r - i + 1, z[i - l])

while i + z[i] < n and string[z[i]] == string[i + z[i]]:
z[i] += 1

if i + z[i] - 1 > r:
l = i
r = i + z[i] - 1

return z

``````
``````

In [2]:

def string_matching(pattern, text):
p, t = len(pattern), len(text)
z = z_algorithm(pattern + '#' + text)

matches = []
for i in range(p + 1, p + t + 1):
if z[i] == p:
matches.append(i - p - 1)

return matches

``````

# Run(s)

``````

In [3]:

print(string_matching('ABA', 'CABBCABABAB'))

``````
``````

[5, 7]

``````
``````

In [4]:

print(z_algorithm('abababababababababbabababa'))

``````
``````

[0, 0, 16, 0, 14, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 7, 0, 5, 0, 3, 0, 1]

``````
``````

In [6]:

print(z_algorithm('ababa'))

``````
``````

[0, 0, 3, 0, 1]

``````
``````

In [14]:

with open('raven.txt', 'r') as file:

``````
``````

In [15]:

print(string_matching('the', raven))

``````
``````

[160, 371, 444, 472, 562, 584, 617, 676, 821, 1060, 1140, 1317, 1338, 1410, 1504, 1534, 1566, 1580, 1597, 1671, 1732, 1919, 1930, 2033, 2080, 2133, 2166, 2196, 2481, 2512, 2658, 2713, 2749, 3036, 3116, 3145, 3248, 3253, 3280, 3285, 3342, 3378, 3444, 3485, 3727, 3806, 3945, 4213, 4336, 4369, 4434, 4514, 4612, 4668, 4678, 4703, 4735, 4799, 4840, 4940, 4965, 4985, 5126, 5137, 5198, 5402, 5459, 5523, 5555, 5660, 5675, 5691, 5823, 5923, 5952, 6017, 6093, 6140, 6195, 6261]

``````
``````

In [16]:

print(string_matching('our', raven))

``````
``````

[1109, 2822, 3232, 5594]

``````