In [1]:
from platform import python_version
print "Python", python_version()
I don't want to get into the details about sorting algorithms here, but there is a great report
"Sorting in the Presence of Branch Prediction and Caches - Fast Sorting on Modern Computers" written by Paul Biggar and David Gregg, where they describe and analyze elementary sorting algorithms in very nice detail (see chapter 4).
And for a quick reference, this website has a nice animation of this algorithm.
A long story short: The "worst-case" complexity of the Bubblesort algorithm (i.e., "Big-O")
$\Rightarrow \pmb O(n^2)$
Maybe we can speed things up a little bit via Cython's C-extensions for Python. Cython is basically a hybrid between C and Python and can be pictured as compiled Python code with type declarations.
Since we are working in an IPython notebook here, we can make use of the very convenient IPython magic: It will take care of the conversion to C code, the compilation, and eventually the loading of the function.
Note that the static type declarations that we add via cdef
are note required for Cython to work, but it will speed things up tremendously.
In [2]:
%load_ext cythonmagic
In [3]:
%%cython
import numpy as np
cimport numpy as np
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef cython_bubblesort1(long[:] np_ary):
""" The Cython implementation of Bubblesort with NumPy memoryview."""
cdef unsigned long length, i, swapped, ele, temp
length = np_ary.shape[0]
swapped = 1
for i in xrange(0, length):
if swapped:
swapped = 0
for ele in xrange(0, length-i-1):
if np_ary[ele] > np_ary[ele + 1]:
temp = np_ary[ele + 1]
np_ary[ele + 1] = np_ary[ele]
np_ary[ele] = temp
swapped = 1
return np.asarray(np_ary)
In [4]:
%%cython
import numpy as np
cimport numpy as np
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef cython_bubblesort2(inp_ary):
""" The Cython implementation of Bubblesort with NumPy memoryview."""
cdef unsigned long length, i, swapped, ele, temp
cdef long[:] np_ary = inp_ary
length = np_ary.shape[0]
swapped = 1
for i in xrange(0, length):
if swapped:
swapped = 0
for ele in xrange(0, length-i-1):
if np_ary[ele] > np_ary[ele + 1]:
temp = np_ary[ele + 1]
np_ary[ele + 1] = np_ary[ele]
np_ary[ele] = temp
swapped = 1
return inp_ary
In [13]:
import timeit
import random
import copy
import numpy as np
random.seed(4567)
funcs = ['cython_bubblesort1',
'cython_bubblesort2',
]
orders_n = [10**n for n in range(1, 6)]
timings = {f:[] for f in funcs}
for n in orders_n:
l = [np.random.randint(n) for num in range(n)]
for f in funcs:
l_copy = np.asarray(copy.deepcopy(l))
timings[f].append(min(timeit.Timer('%s(l_copy)' %f,
'from __main__ import %s, l_copy' %f)
.repeat(repeat=3, number=10)))
In [14]:
import platform as pf
from cython import __version__ as cython__version__
from numba import __version__ as numba__version__
from llvm import __version__ as llvm__version__
from parakeet import __version__ as parakeet__version__
sys_info = [
['Python version', pf.python_version()],
['Compiler', pf.python_compiler()],
['Cython version', cython__version__],
['NumPy version', np.__version__]
]
In [15]:
%matplotlib inline
In [16]:
import matplotlib.pyplot as plt
def plot(timings, title, ranked_labels, labels, orders_n, sys_info):
plt.rcParams.update({'font.size': 12})
fig = plt.figure(figsize=(11,10))
for lb in ranked_labels:
plt.plot(orders_n, timings[lb], alpha=0.5, label=labels[lb],
marker='o', lw=3)
plt.xlabel('sample size n (items in the list)')
plt.ylabel('time per computation in milliseconds')
plt.xlim([min(orders_n) / 10, max(orders_n)* 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title(title)
ftext = "".join([" ".join(entry)+'\n' for entry in sys_info])
plt.figtext(.14,.55, ftext, fontsize=11, ha='left')
plt.show()
In [17]:
import prettytable
def summary_table(ranked_labels):
fit_table = prettytable.PrettyTable(['n=%s' %orders_n[-1],
'bubblesort function' ,
'time in millisec.',
'rel. performance gain'])
fit_table.align['bubblesort function'] = 'l'
for entry in ranked_labels:
fit_table.add_row(['', labels[entry[1]], round(entry[0]*100, 3),
round(ranked_labels[0][0]/entry[0], 2)])
# times 100 for converting from seconds to milliseconds: (time*1000 / 10-loops)
print(fit_table)
In [18]:
title = 'Performance of Bubblesort in Cython'
labels = {
'cython_bubblesort1': 'cython_bubblesort1(long[:] np_ary)',
'cython_bubblesort2': 'cpdef cython_bubblesort2(inp_ary)',
}
ranked_by_time = sorted([(time[1][-1],time[0]) for time in timings.items()], reverse=True)
plot(timings, title, [l for t,l in ranked_by_time], labels, orders_n, sys_info)
summary_table(ranked_by_time)