In [ ]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

Data Generation


In [ ]:
np.random.seed(10)
p, q = (np.random.rand(i, 2) for i in (4, 5))
p_big, q_big = (np.random.rand(i, 80) for i in (100, 120))

print(p, "\n\n", q)

Solution


In [ ]:
def naive(p, q):
    ''' fill your code in here...
    '''

Use matching indices

Instead of iterating through indices, one can use them directly to parallelize the operations with Numpy.


In [ ]:
rows, cols = np.indices((p.shape[0], q.shape[0]))
print(rows, end='\n\n')
print(cols)

In [ ]:
print(p[rows.ravel()], end='\n\n')
print(q[cols.ravel()])

In [ ]:
def with_indices(p, q):
    ''' fill your code in here...
    '''

Use a library

scipy is the equivalent of matlab toolboxes and have a lot to offer. Actually the pairwise computation is part of the library through the spatial module.


In [ ]:
from scipy.spatial.distance import cdist

def scipy_version(p, q):
    return cdist(p, q)

Numpy Magic


In [ ]:
def tensor_broadcasting(p, q):
    return np.sqrt(np.sum((p[:,np.newaxis,:]-q[np.newaxis,:,:])**2, axis=2))

Compare methods


In [ ]:
methods = [naive, with_indices, scipy_version, tensor_broadcasting]
timers = []
for f in methods:
    r = %timeit -o f(p_big, q_big)
    timers.append(r)

In [ ]:
plt.figure(figsize=(10,6))
plt.bar(np.arange(len(methods)), [r.best*1000 for r in timers], log=False)  # Set log to True for logarithmic scale
plt.xticks(np.arange(len(methods))+0.2, [f.__name__ for f in methods], rotation=30)
plt.xlabel('Method')
plt.ylabel('Time (ms)')

In [ ]: