Optimizing with Cython Introduction

Make a large set of random integers


In [ ]:
from array import array
from random import randint
a = array('i', (randint(-1000, 1000) for _ in range(10000000)))

Simple way to sum with a for loop


In [ ]:
def sum_py(a):
    s = 0
    for x in a: s += x
    return s

Time the baseline simple sum methods


In [ ]:
%timeit sum_py(a)
%timeit sum(a)

Compile with Cython


In [ ]:
# needs to be run once every time the kernel is started
%load_ext cython

In [ ]:
%%cython
def sum_cy(a):
    s = 0
    for x in a: s += x
    return s

In [ ]:
%timeit sum(a)
%timeit sum_cy(a)

Static Typing


In [ ]:
%%cython
def sum_cy_st(a):
    cdef int x, s = 0
    for x in a: s += x
    return s

In [ ]:
%timeit sum(a)
%timeit sum_cy_st(a)

Colorized Annotations


In [ ]:
%%cython --annotate
def sum_cy_st(a):
    cdef int x, s = 0
    for x in a:
        s += x
    return s

Typed Memoryviews


In [ ]:
%%cython --annotate
from cpython cimport array
def sum_cy_tm(int[:] a):
    cdef int x, s = 0
    for x in a:
        s += x
    return s

In [ ]:
%timeit sum(a)
%timeit sum_cy_tm(a)

With C-like looping


In [12]:
%%cython --annotate
from cpython cimport array
def sum_cy_tm(int[:] a):
    cdef int i, n = a.shape[0], s = 0
    for i in range(n):
        s += a[i]
    return s


Out[12]:
Cython: _cython_magic_4112c0815341e20decb50029f359e2af.pyx

Generated by Cython 0.23.4

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+1: from cpython cimport array
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+2: def sum_cy_tm(int[:] a):
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_4112c0815341e20decb50029f359e2af_1sum_cy_tm(PyObject *__pyx_self, PyObject *__pyx_arg_a); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_4112c0815341e20decb50029f359e2af_1sum_cy_tm = {"sum_cy_tm", (PyCFunction)__pyx_pw_46_cython_magic_4112c0815341e20decb50029f359e2af_1sum_cy_tm, METH_O, 0};
static PyObject *__pyx_pw_46_cython_magic_4112c0815341e20decb50029f359e2af_1sum_cy_tm(PyObject *__pyx_self, PyObject *__pyx_arg_a) {
  __Pyx_memviewslice __pyx_v_a = { 0, 0, { 0 }, { 0 }, { 0 } };
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sum_cy_tm (wrapper)", 0);
  assert(__pyx_arg_a); {
    __pyx_v_a = __Pyx_PyObject_to_MemoryviewSlice_ds_int(__pyx_arg_a); if (unlikely(!__pyx_v_a.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_4112c0815341e20decb50029f359e2af.sum_cy_tm", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_4112c0815341e20decb50029f359e2af_sum_cy_tm(__pyx_self, __pyx_v_a);
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_4112c0815341e20decb50029f359e2af_sum_cy_tm(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_a) {
  int __pyx_v_i;
  int __pyx_v_n;
  int __pyx_v_s;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sum_cy_tm", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_AddTraceback("_cython_magic_4112c0815341e20decb50029f359e2af.sum_cy_tm", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_a, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__14 = PyTuple_Pack(5, __pyx_n_s_a, __pyx_n_s_a, __pyx_n_s_i, __pyx_n_s_n, __pyx_n_s_s); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple__14);
  __Pyx_GIVEREF(__pyx_tuple__14);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_4112c0815341e20decb50029f359e2af_1sum_cy_tm, NULL, __pyx_n_s_cython_magic_4112c0815341e20dec); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_sum_cy_tm, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_C_Python27_settings_ipython_cyth, __pyx_n_s_sum_cy_tm, 2, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+3:     cdef int i, n = a.shape[0], s = 0
  __pyx_v_n = (__pyx_v_a.shape[0]);
  __pyx_v_s = 0;
+4:     for i in range(n):
  __pyx_t_1 = __pyx_v_n;
  for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
    __pyx_v_i = __pyx_t_2;
+5:         s += a[i]
    __pyx_t_3 = __pyx_v_i;
    __pyx_t_4 = -1;
    if (__pyx_t_3 < 0) {
      __pyx_t_3 += __pyx_v_a.shape[0];
      if (unlikely(__pyx_t_3 < 0)) __pyx_t_4 = 0;
    } else if (unlikely(__pyx_t_3 >= __pyx_v_a.shape[0])) __pyx_t_4 = 0;
    if (unlikely(__pyx_t_4 != -1)) {
      __Pyx_RaiseBufferIndexError(__pyx_t_4);
      {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    }
    __pyx_v_s = (__pyx_v_s + (*((int *) ( /* dim=0 */ (__pyx_v_a.data + __pyx_t_3 * __pyx_v_a.strides[0]) ))));
  }
+6:     return s
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_s); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_r = __pyx_t_5;
  __pyx_t_5 = 0;
  goto __pyx_L0;

In [13]:
%timeit sum(a)
%timeit sum_cy_tm(a)


10 loops, best of 3: 101 ms per loop
100 loops, best of 3: 9.8 ms per loop

Compile Optimizations


In [14]:
%%cython --annotate
cimport cython
from cpython cimport array

@cython.boundscheck(False)
@cython.wraparound(False)
def sum_cy_co(int[:] a):
    cdef int i, n = a.shape[0], s = 0
    for i in range(n):
        s += a[i]
    return s


Out[14]:
Cython: _cython_magic_1c2c27d0bd776d20f060ff183e5c1c18.pyx

Generated by Cython 0.23.4

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+01: cimport cython
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 02: from cpython cimport array
 03: 
 04: @cython.boundscheck(False)
 05: @cython.wraparound(False)
+06: def sum_cy_co(int[:] a):
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18_1sum_cy_co(PyObject *__pyx_self, PyObject *__pyx_arg_a); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18_1sum_cy_co = {"sum_cy_co", (PyCFunction)__pyx_pw_46_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18_1sum_cy_co, METH_O, 0};
static PyObject *__pyx_pw_46_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18_1sum_cy_co(PyObject *__pyx_self, PyObject *__pyx_arg_a) {
  __Pyx_memviewslice __pyx_v_a = { 0, 0, { 0 }, { 0 }, { 0 } };
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sum_cy_co (wrapper)", 0);
  assert(__pyx_arg_a); {
    __pyx_v_a = __Pyx_PyObject_to_MemoryviewSlice_ds_int(__pyx_arg_a); if (unlikely(!__pyx_v_a.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18.sum_cy_co", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18_sum_cy_co(__pyx_self, __pyx_v_a);
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18_sum_cy_co(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_a) {
  int __pyx_v_i;
  int __pyx_v_n;
  int __pyx_v_s;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sum_cy_co", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_AddTraceback("_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18.sum_cy_co", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_a, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__14 = PyTuple_Pack(5, __pyx_n_s_a, __pyx_n_s_a, __pyx_n_s_i, __pyx_n_s_n, __pyx_n_s_s); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple__14);
  __Pyx_GIVEREF(__pyx_tuple__14);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_1c2c27d0bd776d20f060ff183e5c1c18_1sum_cy_co, NULL, __pyx_n_s_cython_magic_1c2c27d0bd776d20f0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_sum_cy_co, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_C_Python27_settings_ipython_cyth, __pyx_n_s_sum_cy_co, 6, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+07:     cdef int i, n = a.shape[0], s = 0
  __pyx_v_n = (__pyx_v_a.shape[0]);
  __pyx_v_s = 0;
+08:     for i in range(n):
  __pyx_t_1 = __pyx_v_n;
  for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
    __pyx_v_i = __pyx_t_2;
+09:         s += a[i]
    __pyx_t_3 = __pyx_v_i;
    __pyx_v_s = (__pyx_v_s + (*((int *) ( /* dim=0 */ (__pyx_v_a.data + __pyx_t_3 * __pyx_v_a.strides[0]) ))));
  }
+10:     return s
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_s); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_r = __pyx_t_4;
  __pyx_t_4 = 0;
  goto __pyx_L0;

In [15]:
%timeit sum(a)
%timeit sum_cy_co(a)


10 loops, best of 3: 102 ms per loop
100 loops, best of 3: 3.57 ms per loop

Memoryview Layout


In [16]:
%%cython --annotate
cimport cython
from cpython cimport array

@cython.boundscheck(False)
@cython.wraparound(False)
def sum_cy_cc(int[::1] a):
    cdef int i, n = a.shape[0], s = 0
    for i in range(n):
        s += a[i]
    return s


Out[16]:
Cython: _cython_magic_62346fc64126443b161f7a626d2f29c4.pyx

Generated by Cython 0.23.4

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+01: cimport cython
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 02: from cpython cimport array
 03: 
 04: @cython.boundscheck(False)
 05: @cython.wraparound(False)
+06: def sum_cy_cc(int[::1] a):
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_62346fc64126443b161f7a626d2f29c4_1sum_cy_cc(PyObject *__pyx_self, PyObject *__pyx_arg_a); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_62346fc64126443b161f7a626d2f29c4_1sum_cy_cc = {"sum_cy_cc", (PyCFunction)__pyx_pw_46_cython_magic_62346fc64126443b161f7a626d2f29c4_1sum_cy_cc, METH_O, 0};
static PyObject *__pyx_pw_46_cython_magic_62346fc64126443b161f7a626d2f29c4_1sum_cy_cc(PyObject *__pyx_self, PyObject *__pyx_arg_a) {
  __Pyx_memviewslice __pyx_v_a = { 0, 0, { 0 }, { 0 }, { 0 } };
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sum_cy_cc (wrapper)", 0);
  assert(__pyx_arg_a); {
    __pyx_v_a = __Pyx_PyObject_to_MemoryviewSlice_dc_int(__pyx_arg_a); if (unlikely(!__pyx_v_a.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_62346fc64126443b161f7a626d2f29c4.sum_cy_cc", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_62346fc64126443b161f7a626d2f29c4_sum_cy_cc(__pyx_self, __pyx_v_a);
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_62346fc64126443b161f7a626d2f29c4_sum_cy_cc(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_a) {
  int __pyx_v_i;
  int __pyx_v_n;
  int __pyx_v_s;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sum_cy_cc", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_AddTraceback("_cython_magic_62346fc64126443b161f7a626d2f29c4.sum_cy_cc", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_a, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__14 = PyTuple_Pack(5, __pyx_n_s_a, __pyx_n_s_a, __pyx_n_s_i, __pyx_n_s_n, __pyx_n_s_s); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple__14);
  __Pyx_GIVEREF(__pyx_tuple__14);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_62346fc64126443b161f7a626d2f29c4_1sum_cy_cc, NULL, __pyx_n_s_cython_magic_62346fc64126443b16); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_sum_cy_cc, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_C_Python27_settings_ipython_cyth, __pyx_n_s_sum_cy_cc, 6, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+07:     cdef int i, n = a.shape[0], s = 0
  __pyx_v_n = (__pyx_v_a.shape[0]);
  __pyx_v_s = 0;
+08:     for i in range(n):
  __pyx_t_1 = __pyx_v_n;
  for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
    __pyx_v_i = __pyx_t_2;
+09:         s += a[i]
    __pyx_t_3 = __pyx_v_i;
    __pyx_v_s = (__pyx_v_s + (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_a.data) + __pyx_t_3)) ))));
  }
+10:     return s
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_s); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_r = __pyx_t_4;
  __pyx_t_4 = 0;
  goto __pyx_L0;

In [17]:
%timeit sum(a)
%timeit sum_cy_cc(a)


10 loops, best of 3: 101 ms per loop
100 loops, best of 3: 3.55 ms per loop

Multithreading


In [18]:
%%cython
cimport cython
from cpython cimport array

@cython.boundscheck(False)
@cython.wraparound(False)
def __sum_gil(int[::1] a, int[:] out):
    cdef int i, n = a.shape[0], s = 0
    for i in range(n): s += a[i]
    out[0] = s
    
def sum_par(int[::1] a):
    cdef int[::1] A1 = a[0 : a.shape[0]//2]
    cdef int[::1] A2 = a[a.shape[0]//2 : a.shape[0]]
    cdef int[:] out = array.array('i', (0,0))
    from threading import Thread
    T1 = Thread(target=__sum_gil, args=(A1, out[0:1]))
    T2 = Thread(target=__sum_gil, args=(A2, out[1:2]))
    T1.start(); T2.start()
    T1.join(); T2.join()
    return out[0] + out[1]

In [19]:
%timeit sum(a)
%timeit sum_par(a)


10 loops, best of 3: 101 ms per loop
100 loops, best of 3: 4.44 ms per loop

With release of the GIL


In [20]:
%%cython
cimport cython
from cpython cimport array

@cython.boundscheck(False)
@cython.wraparound(False)
def __sum_gil(int[::1] a, int[:] out):
    cdef int i, n = a.shape[0], s = 0
    with nogil:
        for i in range(n): s += a[i]
    out[0] = s
    
def sum_par(int[::1] a):
    cdef int[::1] A1 = a[0 : a.shape[0]//2]
    cdef int[::1] A2 = a[a.shape[0]//2 : a.shape[0]]
    cdef int[:] out = array.array('i', (0,0))
    from threading import Thread
    T1 = Thread(target=__sum_gil, args=(A1, out[0:1]))
    T2 = Thread(target=__sum_gil, args=(A2, out[1:2]))
    T1.start(); T2.start()
    T1.join(); T2.join()
    return out[0] + out[1]

In [21]:
%timeit sum(a)
%timeit sum_par(a)


10 loops, best of 3: 101 ms per loop
100 loops, best of 3: 2.47 ms per loop

Interfacing with C Code

Need sum.h with int sum_c(int* a, int n); and sum.c with the following:

#include "sum.h"
int sum_c(int* a, int n) {
    int i, s = 0;
    for (i = 0; i < n; ++i) { s += a[i]; }
    return s;
}

Also, the following needs to be updated where -I is the path to where the above is saved.


In [22]:
%%cython -I C:/Python27/notebooks
#distutils: sources=sum.c

cimport cython
from cpython cimport array

cdef extern from "sum.h" nogil:
    cdef int sum_c(int* a, int n)
    
@cython.boundscheck(False)
@cython.wraparound(False)
def sum_cy_c(int[::1] a):
    cdef int out
    with nogil: out = sum_c(&a[0], a.shape[0])
    return out

In [23]:
%timeit sum_cy_cc(a)
%timeit sum_cy_c(a)


100 loops, best of 3: 3.54 ms per loop
100 loops, best of 3: 3.56 ms per loop

Numpy Equivilent


In [24]:
import numpy as np
#np_a = np.random.randint(-1000, 1000, size=10000000)
np_a = np.array(a)
%timeit sum(np_a)
%timeit np_a.sum()
%timeit sum_cy_cc(a)


1 loops, best of 3: 859 ms per loop
100 loops, best of 3: 4.34 ms per loop
100 loops, best of 3: 3.53 ms per loop

Check they all sum properly


In [25]:
(sum_py(a), sum_cy(a), sum(a), sum_cy_st(a), sum_cy_tm(a), sum_cy_co(a),
 sum_cy_cc(a), sum_par(a), sum_cy_c(a), np_a.sum())


Out[25]:
(2140693,
 2140693,
 2140693,
 2140693,
 2140693,
 2140693,
 2140693,
 2140693,
 2140693,
 2140693)

In [ ]: