In this notebook we will look at how we can customize the CCodePrinter to generate C code which calls functions provided by a 3rd party library. For this example we will use fastapprox which is an open source header only library containing fast approximate versions of transcendental functions. Will will look into using fastexp instead of exp.
In [ ]:
import sympy as sym
sym.init_printing()
In [ ]:
args = x, y, z = sym.symbols('x y z')
expr = x + sym.exp(2*y + x) + sym.exp(3*z)
expr
say that we need to evaluate this expression for vectors of values of x, y & z:
In [ ]:
import numpy as np
N = 4096
inp = np.linspace(10, 20, N), np.linspace(-20, -30, N), np.linspace(-9, -3, N)
In [ ]:
f = sym.lambdify(args, expr, modules=['numpy'])
f(*inp)
In [ ]:
%timeit f(*inp)
In [ ]:
from sympy.utilities.autowrap import ufuncify
In [ ]:
uf = ufuncify(args, expr)
In [ ]:
uf(*inp)
In [ ]:
%timeit uf(*inp)
The fact that lambdify is about as fast as our ufuncify tells us that we are bound by the time to evaluate expensive transcendental exp. For those of you who know Cython you can see that in a Cython version as well:
In [ ]:
%load_ext cython
In [ ]:
from scipy2017codegen.templates import render_pyxbld
render_pyxbld('cy_f_mod', include_dirs=[np.get_include()])
In [ ]:
%%cython_pyximport cy_f_mod
from libc.math cimport exp
cimport numpy as cnp
import numpy as np
import cython
def cy_f(
cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] x,
cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] y,
cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] z,
):
cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] out = np.empty(x.size)
cdef double * _x = &x[0]
cdef double * _y = &y[0]
cdef double * _z = &z[0]
cdef double * _out = &out[0]
cdef int i
if x.size != y.size or x.size != z.size:
raise ValueError("Inconsistent length")
for i in range(x.size):
_out[i] = _x[i] + exp(2*_y[i] + _x[i]) + exp(3*_z[i])
return out
In [ ]:
cy_f(*inp)
In [ ]:
%timeit cy_f(*inp)
So let's try to use fastexp from fastapprox:
In [ ]:
import os
import scipy2017codegen
fastapprox_dir = os.path.join(os.path.dirname(scipy2017codegen.__file__), 'fastapprox')
print(''.join(open(os.path.join(fastapprox_dir, 'fastexp.h')).readlines()[62:67]))
In [ ]:
sym.ccode(expr, user_functions={'exp': 'fastexp'})
In [ ]:
render_pyxbld('cy_f_fastexp_mod', include_dirs=[np.get_include()])
In [ ]:
%%cython_pyximport cy_f_fastexp_mod
from libc.math cimport exp
cimport numpy as cnp
import numpy as np
import cython
cdef extern from "fastapprox/fastexp.h":
float fastexp(float)
def cy_f_fastexp(
cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] x,
cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] y,
cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] z,
):
cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] out = np.empty(x.size)
cdef double * _x = &x[0]
cdef double * _y = &y[0]
cdef double * _z = &z[0]
cdef double * _out = &out[0]
cdef int i
if x.size != y.size or x.size != z.size:
raise ValueError("Inconsistent length")
for i in range(x.size):
_out[i] = _x[i] + fastexp(2*_y[i] + _x[i]) + fastexp(3*_z[i])
return out
In [ ]:
%timeit cy_f_fastexp
So that's a 10x speedup, of course at the cost of lost precision, but we are already assuming that was acceptable:
In [ ]:
cy_f_fastexp(*inp) - f(*inp)