In [1]:
import time
print('Last updated: %s' %time.strftime('%d/%m/%Y'))


Last updated: 24/06/2014


I would be happy to hear your comments and suggestions.
Please feel free to drop me a note via twitter, email, or google+.


Day 12 - One Python Benchmark per Day

Lightning fast insertion into sorted lists via the bisect module



Python comes with a full-charged battery pack of useful Standard Library modules. The bisect module (short for "Array bisection algorithm") was built for fast insertion of new values to already sorted lists.

In this benchmark, we will take a look at 2 of its useful functions:


  • bisect_left(val, a_list):
    Returns the insertion point for a value into a sorted list. E.g.,

In [4]:
import bisect
a_list = [1,2,4]
bisect.bisect_left(a_list, 3)


Out[4]:
2


  • insort_left(val, a_list):
    Directly inserts a new value into a sorted list maintaining the order.

In [5]:
a_list = [1,2,4]
bisect.insort_left(a_list, 3)
a_list


Out[5]:
[1, 2, 3, 4]



Approaches for inserting a value into a sorted list


In [4]:
def insert_sort(a_list, val):
    a_list.append(val)
    a_list.sort()
    return None

In [5]:
from bisect import bisect_left

def bisect_insert(a_list, val):
    a_list.insert(bisect_left(a_list, val), val)
    return None

In [6]:
from bisect import insort_left

def bisect_insort(a_list, val):
    insort_left(a_list, val)
    return None



timeit benchmark for integer lists


In [7]:
import timeit
import random

random.seed(123)
a_list = [random.randrange(0, 1000) for i in range(50)]
a_list.sort()
b_list = a_list[:]
c_list = a_list[:]

insert_int = [] # for timeit results

random.seed(123)
insert_int.append(min(timeit.Timer('insert_sort(a_list, random.randrange(0, 1000))', 
            'from __main__ import insert_sort, a_list, random').repeat(repeat=3, number=10000)))

random.seed(123)
insert_int.append(min(timeit.Timer('bisect_insert(b_list, random.randrange(0, 1000))', 
            'from __main__ import bisect_insert, b_list, random').repeat(repeat=3, number=10000)))

random.seed(123)
insert_int.append(min(timeit.Timer('bisect_insort(c_list, random.randrange(0, 1000))', 
            'from __main__ import bisect_insort, c_list, random').repeat(repeat=3, number=10000)))

Checking that all implementations produced similar results


In [8]:
assert(a_list[:] == b_list[:] == c_list[:])
print('ok')


ok



timeit benchmark for "string" lists

Random String Generator


In [9]:
import string

def rand_string(length):
    """ Generates a random string of numbers, lower- and uppercase chars. """
    return ''.join(random.choice(
            string.ascii_lowercase + string.ascii_uppercase + string.digits) 
                   for i in range(length)
            )

print("Example:", rand_string(length=8))


Example: WEbg6EQl

In [10]:
import timeit
import random

a_list = [rand_string(4) for i in range(50)]
a_list.sort()
b_list = a_list[:]
c_list = a_list[:]

insert_str = [] # for timeit results

random.seed(123)
insert_str.append(min(timeit.Timer('insert_sort(a_list, rand_string(4))', 
            'from __main__ import insert_sort, a_list, rand_string').repeat(repeat=3, number=10000)))

random.seed(123)
insert_str.append(min(timeit.Timer('bisect_insert(b_list, rand_string(4))', 
            'from __main__ import bisect_insert, b_list, rand_string').repeat(repeat=3, number=10000)))

random.seed(123)
insert_str.append(min(timeit.Timer('bisect_insort(c_list, rand_string(4))', 
            'from __main__ import bisect_insort, c_list, rand_string').repeat(repeat=3, number=10000)))

Check that all implementations produced similar results


In [11]:
assert(a_list[:] == b_list[:] == c_list[:])
print('ok')


ok



Preparing to plot the results


In [12]:
import platform
import multiprocessing

def print_sysinfo():
    
    print('\nPython version  :', platform.python_version())
    print('compiler        :', platform.python_compiler())
    
    print('\nsystem     :', platform.system())
    print('release    :', platform.release())
    print('machine    :', platform.machine())
    print('processor  :', platform.processor())
    print('CPU count  :', multiprocessing.cpu_count())
    print('interpreter:', platform.architecture()[0])
    print('\n\n')

In [13]:
%matplotlib inline

In [16]:
import matplotlib.pyplot as plt

def plot():

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5))

    
    bar_labels = ['.insert() + .sort()', 
                  'bisect_left() + .insert()', 
                  'bisect_insort()'
                  ]
    bar_width = 0.5
    bar_left = [i+1 for i in range(len(insert_int))] 
    tick_pos = [i+(bar_width/2) for i in bar_left] 
    plt.grid()
    
    ax1.bar(bar_left, insert_int, width=bar_width,
            alpha=0.5, color='b', label='integer list')

    
    plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
    
    plt.setp(plt.gca().get_xticklabels(), rotation=45, 
             horizontalalignment='right')
    
    plt.sca(ax1)
    plt.xticks(tick_pos, bar_labels)

    ax1.set_ylabel("time in milliseconds for 1000 iterations and 3 repititions")
    ax1.set_xlabel("")
    
    
    ax2.bar(bar_left, insert_str, width=bar_width,
            alpha=0.5, color='g', label='string list')
    
    plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
    plt.grid()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, 
             horizontalalignment='right')

    plt.sca(ax2)
    plt.xticks(tick_pos, bar_labels)

    ax2.set_ylabel("time in milliseconds for 1000 iterations and 3 repititions")
    ax2.set_xlabel("")
    plt.ylim([0,2.2])
    
    handles, labels = ax1.get_legend_handles_labels()
    ax1.legend(handles, labels)
    handles, labels = ax2.get_legend_handles_labels()
    ax2.legend(handles, labels)
    
    plt.suptitle('Inserting new values into a sorted list', fontsize=18)
    plt.show()



Results


In [17]:
plot()
print_sysinfo()


Python version  : 3.4.1
compiler        : GCC 4.2.1 (Apple Inc. build 5577)

system     : Darwin
release    : 13.2.0
machine    : x86_64
processor  : i386
CPU count  : 4
interpreter: 64bit