In [1]:

    
import numpy as np
import numba
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor
from joblib import Parallel, delayed
import matplotlib.pyplot as plt

`numpy.vectorize`



In [2]:

    
def in_unit_circle(x, y):
    if x**2 + y**2 < 1:
        return 1
    else:
        return 0



In [3]:

    
@numba.vectorize('int64(float64, float64)',target='cpu')
def in_unit_circle_serial(x, y):
    if x**2 + y**2 < 1:
        return 1
    else:
        return 0



In [4]:

    
@numba.vectorize('int64(float64, float64)',target='parallel')
def in_unit_circle_multicore(x, y):
    if x**2 + y**2 < 1:
        return 1
    else:
        return 0



In [5]:

    
n = int(1e7)
xs, ys = np.random.random((2, n))



In [6]:

    
%%time
4 * np.sum(in_unit_circle(x, y) for x, y in zip(xs, ys))/n









    



CPU times: user 13.9 s, sys: 65.3 ms, total: 14 s
Wall time: 13.9 s






    Out[6]:





3.1416524



In [7]:

    
%%time
4 * np.sum(in_unit_circle_serial(xs, ys))/n









    



CPU times: user 91.9 ms, sys: 63.6 ms, total: 156 ms
Wall time: 152 ms






    Out[7]:





3.1416523999999999



In [8]:

    
%%time
4 * np.sum(in_unit_circle_multicore(xs, ys))/n









    



CPU times: user 231 ms, sys: 25.9 ms, total: 257 ms
Wall time: 97.9 ms






    Out[8]:





3.1416523999999999

Multi-core processing



In [9]:

    
def plot_one(data, name):
    xs, ys = data.T
    plt.scatter(xs, ys, s=1, edgecolor=None)
    plt.savefig('%s.png' % name)
    return name



In [10]:

    
data = np.random.random((10, 10000, 2))

Single core



In [11]:

    
%%time

for i, M in enumerate(data):
    plot_one(M, i)









    



CPU times: user 2.18 s, sys: 54.6 ms, total: 2.23 s
Wall time: 2.21 s

Threads

%%time
args = [(x, i) for i, x in enumerate(data)]

def plot_one_(arg):
    return plot_one(*arg)

with ThreadPoolExecutor() as pool:
    pool.map(plot_one_, args)

Processes



In [12]:

    
%%time
args = [(x, i) for i, x in enumerate(data)]

with mp.Pool() as pool:
    pool.starmap(plot_one, args)









    



CPU times: user 24.1 ms, sys: 61.6 ms, total: 85.7 ms
Wall time: 693 ms



In [13]:

    
%%time
args = [(x, i) for i, x in enumerate(data)]

with mp.Pool() as pool:
    results = pool.starmap_async(plot_one, args)









    



CPU times: user 16.1 ms, sys: 52.8 ms, total: 68.9 ms
Wall time: 161 ms

Parallel comprehensions with `joblib`



In [14]:

    
%%time

Parallel(n_jobs=-1)(delayed(plot_one)(x, i) for i, x in enumerate(data))
pass









    



CPU times: user 99.6 ms, sys: 67 ms, total: 167 ms
Wall time: 770 ms

Blocking and non-blocking calls



In [15]:

    
def f(x):
    import time
    
    time.sleep(np.random.randint(0, 5))
    return x



In [16]:

    
%%time

with mp.Pool(processes=4) as pool:
    result = pool.map(f, range(10))









    



CPU times: user 16.6 ms, sys: 49 ms, total: 65.5 ms
Wall time: 5.09 s



In [17]:

    
result









    Out[17]:





[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]



In [18]:

    
%%time

pool = mp.Pool(processes=4)
result = pool.map_async(f, range(10))









    



CPU times: user 8.96 ms, sys: 28.1 ms, total: 37 ms
Wall time: 31.7 ms



In [20]:

    
if result.ready() and result.successful():
    print(result.get())
else:
    print(result.wait())









    



[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

numpy.vectorize

Multi-core processing

Single core

Threads

Processes

Parallel comprehensions with joblib

Blocking and non-blocking calls

`numpy.vectorize`

Parallel comprehensions with `joblib`