This benchmark was inspired by the article "A Nice Little Bit of Python" for finding the most elegant way to determine if a string ends with a particular substring.
There is the "old-fashioned" way using a sequence of "or
"s, e.g.,
if needle.endswith('ly') or needle.endswith('ed') or\
needle.endswith('ing') or needle.endswith('ers'):
print('Is valid')
else:
print('Invalid')
Or the more elegant way using a list comprehension that was suggested by the author:
if any([needle.endswith(e) for e in ('ly', 'ed', 'ing', 'ers')]):
print('Is valid')
else:
print('Invalid')
However, there are two even better solutions. One would be replacing the list comprehension by a generator, and my favorite one: feeding a tuple to the .endswith()
method. See the different implementations below:
In [1]:
from re import search as re_search
from re import compile as re_compile
def old_fashioned(needle):
return bool(needle.endswith('ly') or needle.endswith('ed') or\
needle.endswith('ing') or needle.endswith('ers'))
def list_comprehension(needle):
return bool(any([needle.endswith(e) for e in ('ly', 'ed', 'ing', 'ers')]))
def generator(needle):
return bool(any(needle.endswith(e) for e in ('ly', 'ed', 'ing', 'ers')))
def endswith_tuple(needle):
return bool(needle.endswith(('ly', 'ed', 'ing', 'ers')))
def regexpr(needle):
return bool(re_search(r'(?:ly|ed|ing|ers)$', needle))
comp = re_compile(r'(?:ly|ed|ing|ers)$')
def compiled_regexpr(needle):
return bool(comp.search(needle))
def map_func(needle):
return any(map(needle.endswith, ('ly', 'ed', 'ing', 'ers')))
Quick note on why I am importing re.search
as re_search
:
To decrease the overhead for the lookup.
In [2]:
import re
%timeit re_search(r'(?:ly|ed|ing|ers)$', 'needlers')
%timeit re.search(r'(?:ly|ed|ing|ers)$', 'needlers')
In [3]:
funcs = [old_fashioned, list_comprehension,
generator, endswith_tuple,
regexpr, compiled_regexpr,
map_func
]
In [4]:
for f in funcs:
assert(f('neeeeedly') == True), f.__name__
assert(f('neeeeedlee') == False), f.__name__
assert(f('nelyeedlee') == False), f.__name__
print('All functions work correctly.')
In [5]:
import timeit
test_cases = ["neeeeedly", "neeeeedlers", "neeeeedlee", ]
times_n = {f.__name__:[] for f in funcs}
for t in test_cases:
for f in funcs:
f = f.__name__
times_n[f].append(min(timeit.Timer('%s(t)' %f,
'from __main__ import %s, t' %f)
.repeat(repeat=500, number=1000)))
In [6]:
import platform
import multiprocessing
def print_sysinfo():
print('\nPython version :', platform.python_version())
print('compiler :', platform.python_compiler())
print('\nsystem :', platform.system())
print('release :', platform.release())
print('machine :', platform.machine())
print('processor :', platform.processor())
print('CPU count :', multiprocessing.cpu_count())
print('interpreter :', platform.architecture()[0])
print('\n\n')
In [7]:
%matplotlib inline
In [10]:
from numpy import arange
import matplotlib.pyplot as plt
def plot():
labels = [('old_fashioned','needle.endswith("ly") or needle.endswith("ed") or ...'),
('list_comprehension', 'any([needle.endswith(e) for e in ("ly", "ed", ...)])'),
('generator', 'any(needle.endswith(e) for e in ("ly", "ed", ...))'),
('regexpr', 're.search(r"(?:ly|ed|ing|ers)$", needle)'),
('compiled_regexpr', 'same as above, but with compiled regexpr'),
('map_func', 'any(map(needle.endswith, ("ly", "ed", "ing", "ers")))'),
('endswith_tuple', 'needle.endswith(("ly", "ed", "ing", "ers"))'),
]
x_labels = ['True (first match): "neeeeedly"',
'True (first match): "neeeeedlers"',
'False: "neeeeedlee"'
]
ind = arange(len(test_cases)) # the x locations for the groups
width = 0.12
fig = plt.figure(figsize=(14,8))
ax = fig.add_subplot(111)
colors = [(0,'c'), (1,'b'), (2,'g'), (3,'r'), (4,'y'), (5,'m'), (6, 'k')]
for l,c in zip(labels,colors):
ax.bar(ind + c[0]*width,
times_n[l[0]],
width,
alpha=0.5,
color=c[1],
label=l[1]
)
ax.set_ylabel('execution time in microseconds', fontsize=16)
ax.set_title('Methods for determening if a string '\
'ends with a particular substring',
fontsize=18)
ax.set_xticks(ind + width + 0.24)
ax.set_xticklabels(x_labels)
leg = ax.legend(loc='upper left', bbox_to_anchor = (0.85, 1.0))
plt.grid()
plt.ylim([0, 0.003])
plt.xlim([-0.2, 3.0])
plt.show()
In [11]:
print_sysinfo()
plot()
Clearly, the most elegant and Pythonic way of determining if a string ends with a particular substring is also the most efficient one!