In [1]:
# following: http://nbviewer.ipython.org/url/jakevdp.github.com/downloads/notebooks/memview_bench.ipynb

In [1]:
%load_ext cythonmagic

In [2]:
import numpy as np
import numpy.random as npr
from numpy.linalg import det

In [23]:
X = npr.randn(1000,3)

In [24]:
## 0. Python-only version

def euclidean(x1,x2):
    return np.sqrt(sum((x1-x2)**2))

def pairwise_v1(X,metric=euclidean):
    n,dim = X.shape
    D = np.zeros((n,n))
    
    for i in range(n):
        for j in range(n):
            D[i,j] = metric(X[i],X[j])
    return D

In [25]:
%timeit pairwise_v1(X)


1 loops, best of 3: 8.13 s per loop

In [28]:
%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# a function pointer to a metric
ctypedef double (*metric_ptr)(np.ndarray,np.ndarray)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean(np.ndarray[double,ndim=1,mode='c'] x1,
                      np.ndarray[double,ndim=1,mode='c'] x2):
    cdef double tmp,d
    cdef np.intp_t i,N
    
    d = 0
    N = x1.shape[0]
    
    for i in range(N):
        tmp = x1[i] - x2[i]
        d+=tmp*tmp
    return sqrt(d)

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v2(np.ndarray[double, ndim=2, mode='c'] X not None,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n
    n = X.shape[0]

    cdef np.ndarray[double, ndim=2, mode='c'] D = np.empty((n,
                                                            n))
    for i in range(n):
        for j in range(n):
            D[i, j] = dist_func(X[i], X[j])

    return D

In [29]:
%timeit pairwise_v1(X)


1 loops, best of 3: 8.15 s per loop

In [30]:
%timeit pairwise_v2(X)


1 loops, best of 3: 2.26 s per loop

In [31]:
%%cython
import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

ctypedef double (*metric_ptr)(double[::1], double[::1])

cdef double euclidean(double[::1] x1,
                      double[::1] x2):
    cdef double tmp,d
    cdef np.intp_t i,N
    
    d = 0
    N = x1.shape[0]
    
    for i in range(N):
        tmp = x1[i] - x2[i]
        d+=tmp*tmp
    return sqrt(d)


def pairwise_v3(double[:, ::1] X not None,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n_samples
    n_samples = X.shape[0]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))

    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = dist_func(X[i], X[j])

    return D

In [32]:
%timeit pairwise_v3(X)


10 loops, best of 3: 44.7 ms per loop

In [33]:
%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double*, double*, int)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double* x1,
                               double* x2,
                               int N):
    cdef double tmp, d
    cdef np.intp_t i

    d = 0

    for i in range(N):
        tmp = x1[i] - x2[i]
        d += tmp * tmp

    return sqrt(d)


@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v4(double[:, ::1] X not None,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean_distance
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n_samples, n_dim
    n_samples = X.shape[0]
    n_dim = X.shape[1]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))

    cdef double* Dptr = &D[0, 0]
    cdef double* Xptr = &X[0, 0]

    for i in range(n_samples):
        for j in range(n_samples):
            Dptr[i * n_samples + j] = dist_func(Xptr + i * n_dim,
                                                Xptr + j * n_dim,
                                                n_dim)
    return D

In [34]:
%timeit pairwise_v4(X)


100 loops, best of 3: 4.68 ms per loop

In [26]:
from scipy.spatial.distance import pdist

In [27]:
%timeit pdist(X)


100 loops, best of 3: 2.5 ms per loop

In [35]:
X = npr.randn(500,100,3)

In [36]:
from numpy.linalg import det
BC = lambda X,Y: det(X.T.dot(Y))/ np.sqrt(det(X.T.dot(X)) * det(Y.T.dot(Y)))

In [39]:
BC(X[0],X[10])


Out[39]:
0.00097428727055857804

In [40]:
def pairwise_bc_v1(X):
    bc = np.zeros((len(X),len(X)))
    for i in range(len(X)):
        for j in range(len(X)):
            bc[i,j] = BC(X[i],X[j])
    return bc

In [ ]:


In [ ]:


In [42]:
%timeit pairwise_bc_v1(X)


1 loops, best of 3: 13 s per loop

In [ ]:
%%cython
import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

ctypedef double (*metric_ptr)(double[::2], double[::2])

cdef double euclidean(double[::2] x1,
                      double[::2] x2):
    cdef double tmp,d
    cdef np.intp_t i,N
    
    d = 0
    N = x1.shape[0]
    
    for i in range(N):
        tmp = x1[i] - x2[i]
        d+=tmp*tmp
    return sqrt(d)


def pairwise_v3(double[:, ::1] X not None,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n_samples
    n_samples = X.shape[0]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))

    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = dist_func(X[i], X[j])

    return D

In [50]:
def myloop(X):
    s=0
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            for k in range(X.shape[2]):
                s+=X[i,j,k]
    return s

In [53]:
%%cython
import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

def myloop_c(np.ndarray[np.double_t,ndim=3] X):
    cdef int i,j,k
    cdef double s
    s=0
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            for k in range(X.shape[2]):
                s+=X[i,j,k]
    return s

In [48]:
X.shape


Out[48]:
(500, 100, 3)

In [52]:
%timeit myloop(X)


10 loops, best of 3: 110 ms per loop

In [54]:
%timeit myloop_c(X)


1000 loops, best of 3: 239 µs per loop

In [63]:
%%cython
import numpy as np
from numpy.linalg import det

cimport numpy as np
from libc.math cimport sqrt
cimport cython

def mymat_c(np.ndarray[np.double_t,ndim=3] X):
    cdef int i,j,k
    cdef double s
    s = 0
    
    
    
    return s

In [64]:
mymat_c(X)


Out[64]:
0.0

In [62]:
X.shape


Out[62]:
(500, 100, 3)

In [66]:
%%cython
import numpy as np
from numpy.linalg import det

cimport numpy as np
from libc.math cimport sqrt
cimport cython

def mymat_c(np.ndarray[np.double_t,ndim=2] X):
    cdef int i,j,k
    cdef double s
    s = 0
    
    s += det(X)
    
    return s

In [69]:
X[0].shape


Out[69]:
(100, 3)

In [76]:
sq = npr.randn(100,100)

In [77]:
mymat_c(sq)


Out[77]:
1.5574946108189126e+76

In [86]:
%%cython
import numpy as np
from numpy.linalg import det

cimport numpy as np
from libc.math cimport sqrt
cimport cython

def binet_cauchy_c(np.ndarray[np.double_t,ndim=2] X,
                   np.ndarray[np.double_t,ndim=2] Y):
    
    return det(X.T.dot(Y))/ sqrt(det(X.T.dot(X)) * det(Y.T.dot(Y)))

In [73]:
binet_cauchy_p = lambda X,Y: det(X.T.dot(Y))/ np.sqrt(det(X.T.dot(X)) * det(Y.T.dot(Y)))

In [78]:
%timeit binet_cauchy_p(sq,sq)


1000 loops, best of 3: 662 µs per loop

In [79]:
%timeit binet_cauchy_c(sq,sq)


1000 loops, best of 3: 616 µs per loop

In [ ]:


In [89]:
%%cython
import numpy as np
from numpy.linalg import det

cimport numpy as np
from libc.math cimport sqrt
cimport cython

cdef double binet_cauchy_c(np.ndarray[np.double_t,ndim=2] X,
                   np.ndarray[np.double_t,ndim=2] Y):
    
    return det(X.T.dot(Y))/ sqrt(det(X.T.dot(X)) * det(Y.T.dot(Y)))

def pairwise_bc_c(np.ndarray[np.double_t,ndim=3] X):
    n_samples = X.shape[0]
    cdef double[:, ::1] D = np.empty((n_samples, n_samples))
    cdef int i,j
    for i in xrange(n_samples):
        for j in xrange(n_samples):
            D[i,j] = binet_cauchy_c(X[i,:,:],X[j,:,:])
    return D

# pointers
def pairwise_bc_pointers(double[:,:,::1] X):
    cdef int n_samples,mat_size
    n_samples = X.shape[0]
    mat_size = X.shape[1]*X.shape[2]
    cdef double[:, ::1] D = np.empty((n_samples, n_samples))
    cdef int i,j
    cdef double* Dptr = &D[0, 0]
    cdef double* Xptr = &X[0, 0, 0]
    for i in xrange(n_samples):
        for j in xrange(n_samples):
            #D[i,j] = binet_cauchy_c(X[i,:,:],X[j,:,:])
            Dptr[i*n_samples+j] = binet_cauchy_c(Xptr+i*mat_size,
                                              Xptr+j*mat_size)
    return D


Error compiling Cython file:
------------------------------------------------------------
...
    cdef double* Dptr = &D[0, 0]
    cdef double* Xptr = &X[0, 0, 0]
    for i in xrange(n_samples):
        for j in xrange(n_samples):
            #D[i,j] = binet_cauchy_c(X[i,:,:],X[j,:,:])
            Dptr[i*n_samples+j] = pairwise_bc(Xptr+i*mat_size,
                                            ^
------------------------------------------------------------

/Users/joshuafass/.ipython/cython/_cython_magic_57aa37c15d80b7bc1ad7aebcebadc62b.pyx:34:45: undeclared name not builtin: pairwise_bc

Error compiling Cython file:
------------------------------------------------------------
...
    cdef double* Dptr = &D[0, 0]
    cdef double* Xptr = &X[0, 0, 0]
    for i in xrange(n_samples):
        for j in xrange(n_samples):
            #D[i,j] = binet_cauchy_c(X[i,:,:],X[j,:,:])
            Dptr[i*n_samples+j] = pairwise_bc(Xptr+i*mat_size,
                                                 ^
------------------------------------------------------------

/Users/joshuafass/.ipython/cython/_cython_magic_57aa37c15d80b7bc1ad7aebcebadc62b.pyx:34:50: Cannot convert 'double *' to Python object

Error compiling Cython file:
------------------------------------------------------------
...
    cdef double* Xptr = &X[0, 0, 0]
    for i in xrange(n_samples):
        for j in xrange(n_samples):
            #D[i,j] = binet_cauchy_c(X[i,:,:],X[j,:,:])
            Dptr[i*n_samples+j] = pairwise_bc(Xptr+i*mat_size,
                                              Xptr+j*mat_size)
                                                 ^
------------------------------------------------------------

/Users/joshuafass/.ipython/cython/_cython_magic_57aa37c15d80b7bc1ad7aebcebadc62b.pyx:35:50: Cannot convert 'double *' to Python object

In [87]:
%timeit binet_cauchy_c(X[0],X[-1])


10000 loops, best of 3: 50.6 µs per loop

In [83]:
%timeit pairwise_bc(X)


1 loops, best of 3: 13.9 s per loop

In [84]:
def pairwise_bc_p(X):
    n_samples = X.shape[0]
    D = np.empty((n_samples, n_samples))
    for i in xrange(n_samples):
        for j in xrange(n_samples):
            D[i,j] = binet_cauchy_c(X[i,:,:],X[j,:,:])
    return D

In [85]:
%timeit pairwise_bc_p(X)


1 loops, best of 3: 14.2 s per loop

In [82]:
X.shape


Out[82]:
(500, 100, 3)

In [105]:
%%cython
import numpy as np
from numpy.linalg import det

cimport numpy as np
from libc.math cimport sqrt
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double distance(double[:,::1] X,
                     np.intp_t i1,
                     np.intp_t i2):
    cdef double tmp,d
    cdef np.intp_t j
    d = 0
    
    for j in range(X.shape[1]):
        tmp = X[i1,j] - X[i2,j]
        d+= tmp*tmp
    return sqrt(d)
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_no_slice(double[:,::1] X not None):
    cdef np.intp_t i,j,n,dim
    n=X.shape[0]
    dim = X.shape[1]
    cdef double[:,::1] D = np.empty((n,n))
    for i in range(n):
        for j in range(n):
            D[i,j] = distance(X,i,j)
    return D

In [106]:
X = npr.randn(1000,3)

In [107]:
%timeit pairwise_no_slice(X)


100 loops, best of 3: 4.83 ms per loop

In [98]:
%timeit pdist(X)


100 loops, best of 3: 3.25 ms per loop

In [103]:
%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[:, ::1], np.intp_t, np.intp_t)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double[:, ::1] X,
                               np.intp_t i1, np.intp_t i2):
    cdef double tmp, d
    cdef np.intp_t j

    d = 0

    for j in range(X.shape[1]):
        tmp = X[i1, j] - X[i2, j]
        d += tmp * tmp

    return sqrt(d)


@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v5(double[:, ::1] X not None):
    cdef np.intp_t i, j, n_samples, n_dim
    n_samples = X.shape[0]
    n_dim = X.shape[1]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))

    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = euclidean_distance(X, i, j)

    return D

In [104]:
%timeit pairwise_v5(X)


100 loops, best of 3: 4.77 ms per loop

In [ ]:


In [7]:
%%cython
import numpy as np
from numpy.linalg import det

cimport numpy as np
from libc.math cimport sqrt
cimport cython

cdef double bc(double[:,:,:] X,
                     np.intp_t i1,
                     np.intp_t i2):
    return sum(X[i1])
    #return det(X[i1].T.dot(X[i2]))/ sqrt(det(X[i1].T.dot(X[i1])) * det(X[i2].T.dot(X[i2])))

def pairwise_bc_no_slice(double[:,:,:] X not None):
    cdef np.intp_t i,j,n
    n=X.shape[0]
    cdef np.ndarray D = np.zeros((n,n),dtype=np.double)
    for i in range(n):
        for j in range(n):
            D[i,j] = bc(X,i,j)
    return D

In [8]:
X = npr.randn(500,10,3)

In [20]:



Out[20]:
<MemoryView of 'array' at 0x10d169300>

In [ ]: