In [1]:
import time
print('Last updated: %s' %time.strftime('%d/%m/%Y'))
Python comes with a full-charged battery pack of useful Standard Library modules. The bisect
module (short for "Array bisection algorithm") was built for fast insertion of new values to already sorted lists.
In this benchmark, we will take a look at 2 of its useful functions:
bisect_left(val, a_list)
:
In [4]:
import bisect
a_list = [1,2,4]
bisect.bisect_left(a_list, 3)
Out[4]:
insort_left(val, a_list)
:
In [5]:
a_list = [1,2,4]
bisect.insort_left(a_list, 3)
a_list
Out[5]:
In [4]:
def insert_sort(a_list, val):
a_list.append(val)
a_list.sort()
return None
In [5]:
from bisect import bisect_left
def bisect_insert(a_list, val):
a_list.insert(bisect_left(a_list, val), val)
return None
In [6]:
from bisect import insort_left
def bisect_insort(a_list, val):
insort_left(a_list, val)
return None
In [7]:
import timeit
import random
random.seed(123)
a_list = [random.randrange(0, 1000) for i in range(50)]
a_list.sort()
b_list = a_list[:]
c_list = a_list[:]
insert_int = [] # for timeit results
random.seed(123)
insert_int.append(min(timeit.Timer('insert_sort(a_list, random.randrange(0, 1000))',
'from __main__ import insert_sort, a_list, random').repeat(repeat=3, number=10000)))
random.seed(123)
insert_int.append(min(timeit.Timer('bisect_insert(b_list, random.randrange(0, 1000))',
'from __main__ import bisect_insert, b_list, random').repeat(repeat=3, number=10000)))
random.seed(123)
insert_int.append(min(timeit.Timer('bisect_insort(c_list, random.randrange(0, 1000))',
'from __main__ import bisect_insort, c_list, random').repeat(repeat=3, number=10000)))
In [8]:
assert(a_list[:] == b_list[:] == c_list[:])
print('ok')
In [9]:
import string
def rand_string(length):
""" Generates a random string of numbers, lower- and uppercase chars. """
return ''.join(random.choice(
string.ascii_lowercase + string.ascii_uppercase + string.digits)
for i in range(length)
)
print("Example:", rand_string(length=8))
In [10]:
import timeit
import random
a_list = [rand_string(4) for i in range(50)]
a_list.sort()
b_list = a_list[:]
c_list = a_list[:]
insert_str = [] # for timeit results
random.seed(123)
insert_str.append(min(timeit.Timer('insert_sort(a_list, rand_string(4))',
'from __main__ import insert_sort, a_list, rand_string').repeat(repeat=3, number=10000)))
random.seed(123)
insert_str.append(min(timeit.Timer('bisect_insert(b_list, rand_string(4))',
'from __main__ import bisect_insert, b_list, rand_string').repeat(repeat=3, number=10000)))
random.seed(123)
insert_str.append(min(timeit.Timer('bisect_insort(c_list, rand_string(4))',
'from __main__ import bisect_insort, c_list, rand_string').repeat(repeat=3, number=10000)))
In [11]:
assert(a_list[:] == b_list[:] == c_list[:])
print('ok')
In [12]:
import platform
import multiprocessing
def print_sysinfo():
print('\nPython version :', platform.python_version())
print('compiler :', platform.python_compiler())
print('\nsystem :', platform.system())
print('release :', platform.release())
print('machine :', platform.machine())
print('processor :', platform.processor())
print('CPU count :', multiprocessing.cpu_count())
print('interpreter:', platform.architecture()[0])
print('\n\n')
In [13]:
%matplotlib inline
In [16]:
import matplotlib.pyplot as plt
def plot():
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5))
bar_labels = ['.insert() + .sort()',
'bisect_left() + .insert()',
'bisect_insort()'
]
bar_width = 0.5
bar_left = [i+1 for i in range(len(insert_int))]
tick_pos = [i+(bar_width/2) for i in bar_left]
plt.grid()
ax1.bar(bar_left, insert_int, width=bar_width,
alpha=0.5, color='b', label='integer list')
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
plt.setp(plt.gca().get_xticklabels(), rotation=45,
horizontalalignment='right')
plt.sca(ax1)
plt.xticks(tick_pos, bar_labels)
ax1.set_ylabel("time in milliseconds for 1000 iterations and 3 repititions")
ax1.set_xlabel("")
ax2.bar(bar_left, insert_str, width=bar_width,
alpha=0.5, color='g', label='string list')
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
plt.grid()
plt.setp(plt.gca().get_xticklabels(), rotation=45,
horizontalalignment='right')
plt.sca(ax2)
plt.xticks(tick_pos, bar_labels)
ax2.set_ylabel("time in milliseconds for 1000 iterations and 3 repititions")
ax2.set_xlabel("")
plt.ylim([0,2.2])
handles, labels = ax1.get_legend_handles_labels()
ax1.legend(handles, labels)
handles, labels = ax2.get_legend_handles_labels()
ax2.legend(handles, labels)
plt.suptitle('Inserting new values into a sorted list', fontsize=18)
plt.show()
In [17]:
plot()
print_sysinfo()