# Typed Memoryview Benchmark

This contains the implementations of the benchmarks described at http://jakevdp.github.com/blog/2012/08/08/memoryview-benchmarks.

Here we'll use ipython's cython magic to compile and run the benchmarks.

``````

In [25]:

# Define our test array
import numpy as np
X = np.random.random((500, 3))

``````
``````

``````

## Python-only Version

``````

In [3]:

import numpy as np

def euclidean_distance(x1, x2):
x1 = np.asarray(x1)
x2 = np.asarray(x2)
return np.sqrt(np.sum((x1 - x2) ** 2))

def pairwise_v1(X, metric=euclidean_distance):
X = np.asarray(X)

n_samples, n_dim = X.shape

D = np.empty((n_samples, n_samples))

for i in range(n_samples):
for j in range(n_samples):
D[i, j] = metric(X[i], X[j])

return D

``````
``````

In [4]:

%timeit pairwise_v1(X)

``````
``````

1 loops, best of 3: 3.09 s per loop

``````

## Cython + numpy

``````

In [4]:

%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(np.ndarray, np.ndarray)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(np.ndarray[double, ndim=1, mode='c'] x1,
np.ndarray[double, ndim=1, mode='c'] x2):
cdef double tmp, d
cdef np.intp_t i, N

d = 0
N = x1.shape[0]
# assume x2 has the same shape as x1.  This could be dangerous!

for i in range(N):
tmp = x1[i] - x2[i]
d += tmp * tmp

return sqrt(d)

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v2(np.ndarray[double, ndim=2, mode='c'] X not None,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")

cdef np.intp_t i, j, n_samples
n_samples = X.shape[0]

cdef np.ndarray[double, ndim=2, mode='c'] D = np.empty((n_samples,
n_samples))
for i in range(n_samples):
for j in range(n_samples):
D[i, j] = dist_func(X[i], X[j])

return D

``````
``````

In [5]:

%timeit pairwise_v2(X)

``````
``````

1 loops, best of 3: 668 ms per loop

``````

## Cython + memviews (with slicing)

``````

In [6]:

%%cython
import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[::1], double[::1])

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double[::1] x1,
double[::1] x2):
cdef double tmp, d
cdef np.intp_t i, N

d = 0
N = x1.shape[0]
# assume x2 has the same shape as x1.  This could be dangerous!

for i in range(N):
tmp = x1[i] - x2[i]
d += tmp * tmp

return sqrt(d)

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v3(double[:, ::1] X not None,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")

cdef np.intp_t i, j, n_samples
n_samples = X.shape[0]

cdef double[:, ::1] D = np.empty((n_samples, n_samples))

for i in range(n_samples):
for j in range(n_samples):
D[i, j] = dist_func(X[i], X[j])

return D

``````
``````

In [7]:

%timeit pairwise_v3(X)

``````
``````

10 loops, best of 3: 22 ms per loop

``````

## Cython + raw pointers

``````

In [8]:

%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double*, double*, int)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double* x1,
double* x2,
int N):
cdef double tmp, d
cdef np.intp_t i

d = 0

for i in range(N):
tmp = x1[i] - x2[i]
d += tmp * tmp

return sqrt(d)

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v4(double[:, ::1] X not None,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")

cdef np.intp_t i, j, n_samples, n_dim
n_samples = X.shape[0]
n_dim = X.shape[1]

cdef double[:, ::1] D = np.empty((n_samples, n_samples))

cdef double* Dptr = &D[0, 0]
cdef double* Xptr = &X[0, 0]

for i in range(n_samples):
for j in range(n_samples):
Dptr[i * n_samples + j] = dist_func(Xptr + i * n_dim,
Xptr + j * n_dim,
n_dim)
return D

``````
``````

In [9]:

%timeit pairwise_v4(X)

``````
``````

100 loops, best of 3: 2.44 ms per loop

``````

## Cython + memviews (no slicing)

``````

In [14]:

%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[:, ::1], np.intp_t, np.intp_t)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double[:, ::1] X,
np.intp_t i1, np.intp_t i2):
cdef double tmp, d
cdef np.intp_t j

d = 0

for j in range(X.shape[1]):
tmp = X[i1, j] - X[i2, j]
d += tmp * tmp

return sqrt(d)

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v5(double[:, ::1] X not None,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")

cdef np.intp_t i, j, n_samples, n_dim
n_samples = X.shape[0]
n_dim = X.shape[1]

cdef double[:, ::1] D = np.empty((n_samples, n_samples))

for i in range(n_samples):
for j in range(n_samples):
D[i, j] = dist_func(X, i, j)

return D

``````
``````

In [28]:

%timeit pairwise_v5(X)

``````
``````

1000 loops, best of 3: 1.64 ms per loop

``````

# Cython + memviews + OpenMP

``````

In [26]:

from cython.parallel import prange
cimport numpy as np
from libc.math cimport sqrt
cimport cython

import numpy as np

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[:, ::1], np.intp_t, np.intp_t) nogil

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double[:, ::1] X,
np.intp_t i1, np.intp_t i2) nogil:
cdef double tmp, d
cdef np.intp_t j

d = 0

for j in range(X.shape[1]):
tmp = X[i1, j] - X[i2, j]
d += tmp * tmp

return sqrt(d)

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v6(double[:, ::1] X not None,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")

cdef np.intp_t i, j, n_samples, n_dim
n_samples = X.shape[0]
n_dim = X.shape[1]

cdef double[:, ::1] D = np.empty((n_samples, n_samples))

for i in prange(n_samples, nogil=True, schedule='guided'):
for j in range(n_samples):
D[i, j] = dist_func(X, i, j)

return D

``````
``````

In [27]:

%timeit pairwise_v6(X)

``````
``````

100 loops, best of 3: 2.19 ms per loop

``````

## Other Similar Routines

Here we'll compare the benchmark to two similar routines from `scipy` and `scikit-learn`

``````

In [12]:

from scipy.spatial.distance import cdist
%timeit cdist(X, X)

``````
``````

100 loops, best of 3: 3.27 ms per loop

``````
``````

In [13]:

from sklearn.metrics import pairwise_distances
%timeit pairwise_distances(X, X)

``````
``````

100 loops, best of 3: 13 ms per loop

``````