Enhancing Performance

Cython


In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame({
    'A': np.random.randn(1000),
    'B': np.random.randn(1000),
    'N': np.random.randint(100, 1000, (1000)),
    'x': 'x',
})
df.head(10)


Out[1]:
A B N x
0 -0.278532 0.279866 220 x
1 -1.295786 -0.045760 867 x
2 -1.741329 1.013186 800 x
3 -0.399503 0.016128 629 x
4 -0.522196 1.634603 230 x
5 0.624148 1.803405 814 x
6 0.041982 1.527557 190 x
7 -1.269780 -0.081592 175 x
8 -0.229136 0.275056 378 x
9 0.540911 -0.032524 779 x

In [2]:
# python 原生函数
def f(x):
    return x * (x - 1)
def intergate_f(x, y, N):
    s = 0
    dx = (y - x) / N
    for i in range(N):
        s += f(x + i * dx)
    return s * dx
%timeit df.apply(lambda x: intergate_f(x['A'], x['B'], x['N']), axis=1)


10 loops, best of 3: 158 ms per loop

In [3]:
# 加载 IPython Cython 语法扩展
%load_ext cythonmagic

In [4]:
%%cython
# 用 cython 编译 python 原生函数
def f_plain(x):
    return x * (x - 1)
def intergate_f_plain(x, y, N):
    s = 0
    dx = (y - x) / N
    for i in range(N):
        s += f_plain(x + i * dx)
    return s * dx

In [5]:
%timeit df.apply(lambda x: intergate_f_plain(x['A'], x['B'], x['N']), axis=1)
# 用 cython 编译后快了 1/8


10 loops, best of 3: 137 ms per loop

add type


In [6]:
%%cython
# 在 cython 中定义数据类型
cdef double f_typed(double x) except? -2:
    return x * (x - 1)
cpdef double intergate_f_typed(double x, double y, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (y - x) / N
    for i in range(N):
        s += f_typed(x + i * dx)
    return s * dx

In [7]:
%timeit df.apply(lambda x: intergate_f_typed(x['A'], x['B'], x['N']), axis=1)
# 用 cython 指定类型后性能大幅提升 x5


10 loops, best of 3: 24.6 ms per loop

In [8]:
%%cython
# 用 cython 写 pandas.apply 操作
cimport numpy as np
import numpy as np
cdef double f_typed(double x) except? -2:
    return x * (x - 1)
cpdef double intergate_f_typed(double x, double y, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (y - x) / N
    for i in range(N):
        s += f_typed(x + i * dx)
    return s * dx
cpdef np.ndarray[double] apply_intergate_f(np.ndarray[double] col_a, np.ndarray[double] col_b, np.ndarray col_n):
    cdef Py_ssize_t i, n = len(col_n)
    cdef np.ndarray[double] res = np.empty(n)
    for i in range(len(col_n)):
        res[i] = intergate_f_typed(col_a[i], col_b[i], col_n[i])
    return res

In [9]:
%timeit apply_intergate_f(df['A'].values, df['B'].values, df['N'].values)
# 用 cython 实现 pandas.apply 之后性能 x60


100 loops, best of 3: 2.15 ms per loop

In [10]:
%%cython
# 更深入的 cython 方法
cimport cython
cimport numpy as np
import numpy as np
cdef double f_typed(double x) except? -2:
    return x * (x - 1)
cpdef double intergate_f_typed(double x, double y, int N):
    cdef int i
    cdef double s, dx
    s = 0
    dx = (y - x) / N
    for i in range(N):
        s += f_typed(x + i * dx)
    return s * dx
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef np.ndarray[double] apply_intergate_f_wrap(np.ndarray[double] col_a, np.ndarray[double] col_b, np.ndarray[Py_ssize_t] col_n):
    cdef Py_ssize_t i, n = len(col_n)
    cdef np.ndarray[double] res = np.empty(n)
    for i in range(len(col_n)):
        res[i] = intergate_f_typed(col_a[i], col_b[i], col_n[i])
    return res

In [11]:
%timeit apply_intergate_f_wrap(df['A'].values, df['B'].values, df['N'].values)
# 没有太大的性能提升


100 loops, best of 3: 2.1 ms per loop

In [11]: