In [1]:
import numpy as np
import numba
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor
from joblib import Parallel, delayed
import matplotlib.pyplot as plt

numpy.vectorize


In [2]:
def in_unit_circle(x, y):
    if x**2 + y**2 < 1:
        return 1
    else:
        return 0

In [3]:
@numba.vectorize('int64(float64, float64)',target='cpu')
def in_unit_circle_serial(x, y):
    if x**2 + y**2 < 1:
        return 1
    else:
        return 0

In [4]:
@numba.vectorize('int64(float64, float64)',target='parallel')
def in_unit_circle_multicore(x, y):
    if x**2 + y**2 < 1:
        return 1
    else:
        return 0

In [5]:
n = int(1e7)
xs, ys = np.random.random((2, n))

In [6]:
%%time
4 * np.sum(in_unit_circle(x, y) for x, y in zip(xs, ys))/n


CPU times: user 13.9 s, sys: 65.3 ms, total: 14 s
Wall time: 13.9 s
Out[6]:
3.1416524

In [7]:
%%time
4 * np.sum(in_unit_circle_serial(xs, ys))/n


CPU times: user 91.9 ms, sys: 63.6 ms, total: 156 ms
Wall time: 152 ms
Out[7]:
3.1416523999999999

In [8]:
%%time
4 * np.sum(in_unit_circle_multicore(xs, ys))/n


CPU times: user 231 ms, sys: 25.9 ms, total: 257 ms
Wall time: 97.9 ms
Out[8]:
3.1416523999999999

Multi-core processing


In [9]:
def plot_one(data, name):
    xs, ys = data.T
    plt.scatter(xs, ys, s=1, edgecolor=None)
    plt.savefig('%s.png' % name)
    return name

In [10]:
data = np.random.random((10, 10000, 2))

Single core


In [11]:
%%time

for i, M in enumerate(data):
    plot_one(M, i)


CPU times: user 2.18 s, sys: 54.6 ms, total: 2.23 s
Wall time: 2.21 s

Threads

%%time
args = [(x, i) for i, x in enumerate(data)]

def plot_one_(arg):
    return plot_one(*arg)

with ThreadPoolExecutor() as pool:
    pool.map(plot_one_, args)

Processes


In [12]:
%%time
args = [(x, i) for i, x in enumerate(data)]

with mp.Pool() as pool:
    pool.starmap(plot_one, args)


CPU times: user 24.1 ms, sys: 61.6 ms, total: 85.7 ms
Wall time: 693 ms

In [13]:
%%time
args = [(x, i) for i, x in enumerate(data)]

with mp.Pool() as pool:
    results = pool.starmap_async(plot_one, args)


CPU times: user 16.1 ms, sys: 52.8 ms, total: 68.9 ms
Wall time: 161 ms

Parallel comprehensions with joblib


In [14]:
%%time

Parallel(n_jobs=-1)(delayed(plot_one)(x, i) for i, x in enumerate(data))
pass


CPU times: user 99.6 ms, sys: 67 ms, total: 167 ms
Wall time: 770 ms

Blocking and non-blocking calls


In [15]:
def f(x):
    import time
    
    time.sleep(np.random.randint(0, 5))
    return x

In [16]:
%%time

with mp.Pool(processes=4) as pool:
    result = pool.map(f, range(10))


CPU times: user 16.6 ms, sys: 49 ms, total: 65.5 ms
Wall time: 5.09 s

In [17]:
result


Out[17]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [18]:
%%time

pool = mp.Pool(processes=4)
result = pool.map_async(f, range(10))


CPU times: user 8.96 ms, sys: 28.1 ms, total: 37 ms
Wall time: 31.7 ms

In [20]:
if result.ready() and result.successful():
    print(result.get())
else:
    print(result.wait())


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]