In [ ]:

    
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

Data Generation



In [ ]:

    
np.random.seed(10)
p, q = (np.random.rand(i, 2) for i in (4, 5))
p_big, q_big = (np.random.rand(i, 80) for i in (100, 120))

print(p, "\n\n", q)

Solution



In [ ]:

    
def naive(p, q):
    ''' fill your code in here...
    '''

Use matching indices

Instead of iterating through indices, one can use them directly to parallelize the operations with Numpy.



In [ ]:

    
rows, cols = np.indices((p.shape[0], q.shape[0]))
print(rows, end='\n\n')
print(cols)



In [ ]:

    
print(p[rows.ravel()], end='\n\n')
print(q[cols.ravel()])



In [ ]:

    
def with_indices(p, q):
    ''' fill your code in here...
    '''

Use a library

scipy is the equivalent of matlab toolboxes and have a lot to offer. Actually the pairwise computation is part of the library through the spatial module.



In [ ]:

    
from scipy.spatial.distance import cdist

def scipy_version(p, q):
    return cdist(p, q)

Numpy Magic



In [ ]:

    
def tensor_broadcasting(p, q):
    return np.sqrt(np.sum((p[:,np.newaxis,:]-q[np.newaxis,:,:])**2, axis=2))

Compare methods



In [ ]:

    
methods = [naive, with_indices, scipy_version, tensor_broadcasting]
timers = []
for f in methods:
    r = %timeit -o f(p_big, q_big)
    timers.append(r)



In [ ]:

    
plt.figure(figsize=(10,6))
plt.bar(np.arange(len(methods)), [r.best*1000 for r in timers], log=False)  # Set log to True for logarithmic scale
plt.xticks(np.arange(len(methods))+0.2, [f.__name__ for f in methods], rotation=30)
plt.xlabel('Method')
plt.ylabel('Time (ms)')



In [ ]: