Outline:
We want to integrate the function $f(x) = x^4 - 3x$.
In [ ]:
def f(x):
y = x**4 - 3*x
return y
def integrate_f(a, b, n):
dx = (b - a) / n
dx2 = dx / 2
s = f(a) * dx2
for i in range(1, n):
s += f(a + i * dx) * dx
s += f(b) * dx2
return s
Now, let's time this:
In [ ]:
%timeit integrate_f(-100, 100, int(1e5))
Not too bad, but this can add up. Let's see if Cython can do better:
In [ ]:
%load_ext cython
In [ ]:
%%cython
def f2(x):
y = x**4 - 3*x
return y
def integrate_f2(a, b, n):
dx = (b - a) / n
dx2 = dx / 2
s = f2(a) * dx2
for i in range(1, n):
s += f2(a + i * dx) * dx
s += f2(b) * dx2
return s
In [ ]:
%timeit integrate_f2(-100, 100, int(1e5))
That's a little bit faster, which is nice since all we did was to call Cython on the exact same code. But can we do better?
In [ ]:
%%cython
def f3(double x):
y = x**4 - 3*x
return y
def integrate_f3(double a, double b, int n):
dx = (b - a) / n
dx2 = dx / 2
s = f3(a) * dx2
for i in range(1, n):
s += f3(a + i * dx) * dx
s += f3(b) * dx2
return s
In [ ]:
%timeit integrate_f3(-100, 100, int(1e5))
The final bit of "easy" Cython optimization is "declaring" the variables inside the function:
In [ ]:
%%cython
def f4(double x):
y = x**4 - 3*x
return y
def integrate_f4(double a, double b, int n):
cdef:
double dx = (b - a) / n
double dx2 = dx / 2
double s = f4(a) * dx2
int i = 0
for i in range(1, n):
s += f4(a + i * dx) * dx
s += f4(b) * dx2
return s
In [ ]:
%timeit integrate_f4(-100, 100, int(1e5))
4X speedup with so little effort is pretty nice. What else can we do?
Cython has a nice "-a" flag (for annotation) that can provide clues about why your code is slow.
In [ ]:
%%cython -a
def f4(double x):
y = x**4 - 3*x
return y
def integrate_f4(double a, double b, int n):
cdef:
double dx = (b - a) / n
double dx2 = dx / 2
double s = f4(a) * dx2
int i = 0
for i in range(1, n):
s += f4(a + i * dx) * dx
s += f4(b) * dx2
return s
That's a lot of yellow still! How do we reduce this?
In [ ]:
This is a very small subset of Python. Most scientific application deal not with single values, but with arrays of data.
In [ ]:
import numpy as np
def mean3filter(arr):
arr_out = np.empty_like(arr)
for i in range(1, arr.shape[0] - 1):
arr_out[i] = np.sum(arr[i-1 : i+1]) / 3
arr_out[0] = (arr[0] + arr[1]) / 2
arr_out[-1] = (arr[-1] + arr[-2]) / 2
return arr_out
In [ ]:
%timeit mean3filter(np.random.rand(1e5))
In [ ]:
%%cython
import cython
import numpy as np
@cython.boundscheck(False)
def mean3filter2(double[::1] arr):
cdef double[::1] arr_out = np.empty_like(arr)
cdef int i
for i in range(1, arr.shape[0]-1):
arr_out[i] = np.sum(arr[i-1 : i+1]) / 3
arr_out[0] = (arr[0] + arr[1]) / 2
arr_out[-1] = (arr[-1] + arr[-2]) / 2
return np.asarray(arr_out)
In [ ]:
%timeit mean3filter2(np.random.rand(1e5))
Rubbish! How do we fix this?
In [ ]:
Warning:: Dragons afoot.
In [ ]:
%%cython -a
import cython
from cython.parallel import prange
import numpy as np
@cython.boundscheck(False)
def mean3filter3(double[::1] arr, double[::1] out):
cdef int i, j, k = arr.shape[0]-1
with nogil:
for i in prange(1, k-1, schedule='static',
chunksize=(k-2) // 2, num_threads=2):
for j in range(i-1, i+1):
out[i] += arr[j]
out[i] /= 3
out[0] = (arr[0] + arr[1]) / 2
out[-1] = (arr[-1] + arr[-2]) / 2
return np.asarray(out)
In [ ]:
rin = np.random.rand(1e7)
rout = np.empty_like(rin)
In [ ]:
%timeit mean3filter2(rin, rout)
In [ ]:
%timeit mean3filter3(rin, rout)
In [ ]:
In [ ]:
%%cython -a
# distutils: language=c++
import cython
from libcpp.vector cimport vector
@cython.boundscheck(False)
def build_list_with_vector(double[::1] in_arr):
cdef vector[double] out
cdef int i
for i in range(in_arr.shape[0]):
out.push_back(in_arr[i])
return out
In [ ]:
build_list_with_vector(np.random.rand(10))
In [ ]:
%%cython -a
#distutils: language=c++
from cython.operator cimport dereference as deref, preincrement as inc
from libcpp.vector cimport vector
from libcpp.map cimport map as cppmap
cdef class Graph:
cdef cppmap[int, vector[int]] _adj
cpdef int has_node(self, int node):
return self._adj.find(node) != self._adj.end()
cdef void add_node(self, int new_node):
cdef vector[int] out
if not self.has_node(new_node):
self._adj[new_node] = out
def add_edge(self, int u, int v):
self.add_node(u)
self.add_node(v)
self._adj[u].push_back(v)
self._adj[v].push_back(u)
def __getitem__(self, int u):
return self._adj[u]
cdef vector[int] _degrees(self):
cdef vector[int] deg
cdef int first = 0
cdef vector[int] edges
cdef cppmap[int, vector[int]].iterator it = self._adj.begin()
while it != self._adj.end():
deg.push_back(deref(it).second.size())
it = inc(it)
return deg
def degrees(self):
return self._degrees()
In [ ]:
g0 = Graph()
In [ ]:
g0.add_edge(1, 5)
g0.add_edge(1, 6)
In [ ]:
g0[1]
In [ ]:
g0.has_node(1)
In [ ]:
g0.degrees()
In [ ]:
import networkx as nx
g = nx.barabasi_albert_graph(100000, 6)
with open('graph.txt', 'w') as fout:
for u, v in g.edges_iter():
fout.write('%i,%i\n' % (u, v))
In [ ]:
%timeit list(g.degree())
In [ ]:
myg = Graph()
def line2edges(line):
u, v = map(int, line.rstrip().split(','))
return u, v
edges = map(line2edges, open('graph.txt'))
for u, v in edges:
myg.add_edge(u, v)
In [ ]:
%timeit mydeg = myg.degrees()
Use setup.py to build your Cython files.
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy as np
setup(
cmdclass = {'build_ext': build_ext},
ext_modules = [
Extension("prange_demo", ["prange_demo.pyx"],
include_dirs=[np.get_include()],
extra_compile_args=['-fopenmp'],
extra_link_args=['-fopenmp', '-lgomp']),
]
)
In [ ]:
from mean3 import mean3filter
mean3filter(np.random.rand(10))
In [ ]: