Jake Vanderplas benchmarks : http://jakevdp.github.io/blog/2013/06/15/numba-vs-cython-take-2/
Out-of-the-box Winpython uses :
To switch to VisualStudio, if you have it , launch in a Ipython cell
!start cmd /C %WINPYDIR%..\scripts\make_cython_use_vc.bat
To switch back to mingw, launch in a Ipython cell
!start cmd /C %WINPYDIR%..\scripts\make_cython_use_mingw.bat
In [1]:
# Thanks to Preliminary, we are ready to use cython
%matplotlib inline
%load_ext cythonmagic
In [2]:
%%cython
print (2+17)
In [3]:
!echo %winpyver%
In [4]:
# prepare the sample datas
import numpy as np
X = np.random.random((1000, 3))
In [5]:
# evaluate numpy speed
def pairwise_numpy(X):
return np.sqrt(((X[:, None, :] - X) ** 2).sum(-1))
%timeit pairwise_numpy(X)
In [6]:
# evaluate pure python interpreter speed
def pairwise_python(X):
toto = 1
M = X.shape[0]
N = X.shape[1]
D = np.empty((M, M), dtype=np.float)
for i in range(M):
for j in range(M):
d = 0.0
for k in range(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
return D
%timeit pairwise_python(X)
In [7]:
%%cython
import numpy as np
cimport cython
from libc.math cimport sqrt
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_cython(double[:, ::1] X):
cdef int M = X.shape[0]
cdef int N = X.shape[1]
cdef double tmp, d , a
cdef double[:, ::1] D = np.empty((M, M), dtype=np.float64)
for i in range(M):
for j in range(M):
d = 0.0
for k in range(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = sqrt(d)
return np.asarray(D)
In [8]:
# evaluate cython speed
%timeit pairwise_cython(X)
In [9]:
# numba
from numba import double
from numba.decorators import jit, autojit
pairwise_numba = autojit(pairwise_python)
%timeit pairwise_numba(X)
In [10]:
from scipy.spatial.distance import cdist
%timeit cdist(X, X)
In [12]:
from sklearn.metrics import euclidean_distances
%timeit euclidean_distances(X, X)
In [13]:
%matplotlib inline
In [18]:
import matplotlib.pyplot as plt
labels = ['python\nloop', 'numpy\nbroadc.', 'sklearn' , 'scipy', 'cython', 'numba']
timings_linux = [13.4 , 0.111 , 0.0356 , 0.0129 , 0.00987 , 0.00912]
timings_winpython = [7.22, 0.110 , 0.0295 , 0.0241 , 0.0175 , 0.0728]
timings_winpython64 = [6.83, 0.0848 , 0.0263 , 0.0172 , 0.0171 , 0.0234]
x = np.arange(len(labels))
ax = plt.axes(xticks=x, yscale='log')
ax.bar(x - 0.3, timings_linux, width=0.8, alpha=0.4, bottom=1E-6 , color='yellow')
ax.bar(x - 0.3, timings_winpython, width=0.7, alpha=0.4, bottom=1E-6)
ax.bar(x - 0.3, timings_winpython64, width=0.6, alpha=0.4, bottom=1E-6, color='red')
ax.grid()
ax.set_xlim(-0.5, len(labels) - 0.5)
ax.set_ylim(1E-3, 1E2)
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda i, loc: labels[int(i)]))
ax.set_ylabel('time (s)')
ax.set_title("Pairwise Distance Timings on Linux and Windows (64bit = red, 32bit= blue, linux=yellow)")
Out[18]:
In [ ]: