(after the command: ipcluster start -n 4)
In [2]:
from IPython.parallel import Client
In [2]:
rc = Client()
In [3]:
rc.ids
Out[3]:
In [4]:
px import os
In [5]:
px print os.getpid()
In [6]:
pxconfig --targets 1
In [7]:
px print os.getpid()
In [8]:
%%px --targets 2
print os.getpid()
In [9]:
pxconfig --targets all
In [10]:
px print os.getpid()
In [11]:
%%px --noblock
import time
time.sleep(1)
os.getpid()
Out[11]:
In [12]:
%pxresult
Parallel map
In [13]:
v = rc[:] # get a view to the engines
In [14]:
with v.sync_imports():
import time
In [15]:
def f(x):
time.sleep(1)
return x**2
In [16]:
v.map_sync(f, range(10))
Out[16]:
In [17]:
timeit -n 1 -r 1 v.map_sync(f, range(10))
In [18]:
timeit -n 1 -r 1 map(f, range(10))
Asychronous map:
In [19]:
r = v.map(f, range(10))
In [21]:
r.ready(), r.elapsed
Out[21]:
In [59]:
r.get()
Out[59]:
In [60]:
r.elapsed, r.serial_time
Out[60]:
In [11]:
def sample(n):
return 4*(rand(n)**2 + rand(n)**2 <= 1).sum()/float(n)
In [52]:
n = 1000000
In [53]:
timeit -r 10 -n 1 sample(n)
In [14]:
from IPython.parallel import Client
rc = Client()
v = rc[:]
In [15]:
with v.sync_imports():
from numpy.random import rand
import numpy
In [19]:
timeit -r 10 -n 1 sum(v.map_sync(sample, [n]*4)) / 4 # len(v) = 4
In [165]:
(rand(5) <= 0.5)
Out[165]:
In [65]:
(rand(4)**2 + rand(4)**2 <= 1)
Out[65]:
In [128]:
%load_ext cythonmagic
Pure Python (to compare to Cython, and Cython+NumPy)
In [182]:
import random
In [157]:
def mcpy1(n):
dfg = 0
for k in xrange(n):
dfg += (random.random()**2 + random.random()**2 <= 1)
return 4*dfg/float(n)
In [158]:
n = 1000000
# r1 = [random.random() for k in range(n)]
In [160]:
timeit -r 10 -n 10 mcpy1(n)
Cython
In [1]:
%load_ext cythonmagic
In [29]:
%%cython
from libc.stdlib cimport rand, RAND_MAX
def mcpy2(int n):
cdef int dfg = 0
cdef int k = 0
for k in xrange(n):
dfg += ((float(rand())/RAND_MAX)**2 + (float(rand())/RAND_MAX)**2 <= 1)
return 4*dfg/float(n)
In [30]:
n = 1000000
In [31]:
timeit -r 10 -n 10 mcpy2(n)
In [47]:
mcpy2(n)
Out[47]:
Cython+NumPy (not so different from pure NumPy in Monte Carlo above, because NumPy types aren't defined in C. See few examples below on how to do it.)
In [171]:
%%cython
import random
import numpy as np
def mcpy3(int n):
return 4*np.sum(np.random.rand(n)**2 + np.random.rand(n)**2 <= 1)/float(n)
In [172]:
n = 1000000
In [173]:
timeit -r 10 -n 1 mcpy3(n)
Random numbers may be misleading, try another algorithm: sum all the squares of numbers till 1 000 000
Pure Python:
In [101]:
n = 1000000
In [126]:
def sumProc01():
return sum(map(lambda x: x**2, xrange(1000000)))
In [127]:
timeit -r 10 -n 10 sumProc01()
In [31]:
print sumProc01()
Cython:
In [199]:
n = 1000000
In [180]:
%%cython
def sumProc02():
return sum(map(lambda x: x**2, xrange(1000000)))
In [181]:
timeit -r 10 -n 10 sumProc02()
In [106]:
def bla():
out = []
for k in xrange(1000000):
out.append(k**2)
return sum(out)
In [113]:
timeit -r 10 -n 10 bla()
In [193]:
%%cython
def bla2():
out = []
cdef int k = 0
for k in xrange(1000000):
out.append(k**2)
return sum(out)
In [194]:
timeit -r 10 -n 10 bla2()
In [24]:
def bla3():
out = 0
for k in xrange(1000000):
out += k**2
return out
In [25]:
timeit -r 10 -n 10 bla3()
In [20]:
def bla5():
return (arange(1000000)**2).sum()
In [21]:
timeit -r 10 -n 10 bla5()
In [66]:
%%cython
def bla4():
cdef int out = 0
cdef int k = 0
for k in xrange(1000000):
out += k**2
return out
In [67]:
timeit -r 10 -n 10 bla4()
Does (x)range add any overhead? Compare with the above bla4():
In [50]:
%%cython
def blanext():
cdef int out = 0
cdef int k = 0
while k < 1000000:
out += k**2
k += 1
return out
In [54]:
timeit -r 10 -n 10 blanext()
Sieve
In [142]:
def primes1(n):
primes = [False, False] + [True] * (n-2)
i = 2
while i < n:
if not primes[i]:
i += 1
continue
k = i * i
while k < n:
primes[k] = False
k += i
i += 1
return [i for i in xrange(2,n) if primes[i]]
In [144]:
primes1(20)
Out[144]:
In [145]:
m = 10000
In [148]:
timeit -n 100 -r 3 primes1(m)
Naive:
In [149]:
%load_ext cythonmagic
In [150]:
%%cython
def primes2(n):
primes = [False, False] + [True] * (n-2)
i = 2
while i < n:
if not primes[i]:
i += 1
continue
k = i * i
while k < n:
primes[k] = False
k += i
i += 1
return [i for i in xrange(2,n) if primes[i]]
In [151]:
timeit -n 100 -r 3 primes2(m)
With C types:
In [152]:
%%cython
def primes3(int n):
primes = [False, False] + [True] * (n-2)
cdef int i = 2
cdef int k = 0
while i < n:
if not primes[i]:
i += 1
continue
k = i * i
while k < n:
primes[k] = False
k += i
i += 1
return [i for i in xrange(2,n) if primes[i]]
In [153]:
timeit -n 100 -r 3 primes3(m)
Using NumPy and Cython
In [1]:
%load_ext cythonmagic
In [7]:
def step1():
return sign(rand(1) - .5)
def sim1(n):
x = zeros(n)
dx = 1./n
for i in xrange(n - 1):
x[i+1] = x[i] + dx * step1()
return x
In [9]:
plot(sim1(10000))
Out[9]:
In [27]:
m = 10000
In [10]:
timeit sim1(m)
Naive Cython + NumPy:
In [16]:
%%cython
import numpy as np
cdef int step2():
return np.sign(np.random.rand(1) - .5)
def sim2(int n):
x = np.zeros(n)
cdef double dx = 1./n
cdef int i
for i in xrange(n - 1):
x[i+1] = x[i] + dx * step2()
return x
In [17]:
timeit sim2(m)
Good Cython from the book:
In [26]:
%%cython
import numpy as np
cimport numpy as np
DTYPE = np.double
ctypedef np.double_t DTYPE_t
from libc.stdlib cimport rand, RAND_MAX
from libc.math cimport round
cdef double step3():
return 2 * round(float(rand()) / RAND_MAX) - 1
def sim3(int n):
cdef int i
cdef double dx = 1./n
cdef np.ndarray[DTYPE_t, ndim=1] x = np.zeros(n, dtype=DTYPE)
for i in xrange(n-1):
x[i+1] = x[i] + dx - step3()
return x
In [28]:
timeit sim3(m)
In [40]:
%%cython
from libc.stdlib cimport rand, RAND_MAX
def bla():
print float(rand())/RAND_MAX
In [43]:
bla()
In [ ]: