Code was executed in Python 3.4.0

Numba vs. Cython vs. regular (C)Python & Numpy

Here, we implement a linear regression via least squares fitting (with vertical offsets) by solving to fit n points $(x_i, y_i)$ with $i=1,2,...n,$ via linear equation of the form
$f(x) = a\cdot x + b$.

I have described the approach in more detail in this IPython notebook.

In the more "classic" approach that is often found in statistics textbooks, we calculate the following parameters as follows:

$a = \frac{S_{x,y}}{\sigma_{x}^{2}}\quad$ (slope)

$b = \bar{y} - a\bar{x}\quad$ (y-axis intercept)

where

$S_{xy} = \sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y})\quad$ (covariance)

$\sigma{_x}^{2} = \sum_{i=1}^{n} (x_i - \bar{x})^2\quad$ (variance)

We will implement this "classic" approach in

Python/CPython: py_lstsqr()
Numba: numba_lstsqrs()
Cython: cy_lstsqrs()



In [1]:

    
import numpy as np
import scipy.stats
from numba import jit



In [2]:

    
%load_ext cythonmagic



In [3]:

    
def py_lstsqr(x, y):
    """ Computes the least-squares solution to a linear matrix equation. """
    len_x = len(x)
    x_avg = sum(x)/len_x
    y_avg = sum(y)/len(y)
    var_x = 0
    cov_xy = 0
    for i in range(len_x):
        temp = (x[i] - x_avg)
        var_x += temp**2
        cov_xy += temp*(y[i] - y_avg)
    slope = cov_xy / var_x
    y_interc = y_avg - slope*x_avg
    return (slope, y_interc)



In [4]:

    
@jit
def numba_lstsqr(x, y):
    """ Computes the least-squares solution to a linear matrix equation. """
    len_x = len(x)
    x_avg = sum(x)/len_x
    y_avg = sum(y)/len(y)
    var_x = 0
    cov_xy = 0
    for i in range(len_x):
        temp = (x[i] - x_avg)
        var_x += temp**2
        cov_xy += temp*(y[i] - y_avg)
    slope = cov_xy / var_x
    y_interc = y_avg - slope*x_avg
    return (slope, y_interc)



In [5]:

    
%%cython
def cy_lstsqr(x, y):
    """ Computes the least-squares solution to a linear matrix equation. """
    cdef double x_avg, y_avg, var_x, cov_xy,\
         slope, y_interc, x_i, y_i
    cdef int len_x
    len_x = len(x)
    x_avg = sum(x)/len_x
    y_avg = sum(y)/len(y)
    var_x = 0
    cov_xy = 0
    for i in range(len_x):
        temp = (x[i] - x_avg)
        var_x += temp**2
        cov_xy += temp*(y[i] - y_avg)
    slope = cov_xy / var_x
    y_interc = y_avg - slope*x_avg
    return (slope, y_interc)

Checking the least square fit



In [6]:

    
%matplotlib inline



In [ ]:



In [30]:

    
from matplotlib import pyplot as plt
import scipy.stats

slope, intercept = py_lstsqr(x, y)

line_x = [round(min(x)) - 1, round(max(x)) + 1]
line_y = [slope*x_i + intercept for x_i in line_x]

plt.figure(figsize=(7,6))
plt.scatter(x,y)
plt.plot(line_x, line_y, color='red', lw='2')

plt.ylabel('y')
plt.xlabel('x')
plt.title('Linear regression via least squares fit')

ftext = 'y = ax + b = {:.3f} + {:.3f}x'\
        .format(slope, intercept)
plt.figtext(.15,.8, ftext, fontsize=11, ha='left')

assert(
scipy.stats.linregress(x, y)[0:2] == py_lstsqr(x, y))

plt.show()

Benchmarking (with Python lists):



In [8]:

    
import matplotlib.pyplot as plt

def plot_benchmarks(times_n):

    labels = [ ('py_lstsqr', '"classic" least squares in reg. (C)Python'),
          ('numba_lstsqr', '"classic" least squares in Numba'),
          ('cy_lstsqr', '"classic" least squares in Cython')]


    plt.rcParams.update({'font.size': 12})

    fig = plt.figure(figsize=(10,8))
    for lb in labels:
        plt.plot(orders_n, times_n[lb[0]], alpha=0.5, label=lb[1], marker='o', lw=3)
    plt.xlabel('sample size n')
    plt.ylabel('time per computation in milliseconds [ms]')
    plt.xlim([1,max(orders_n) + max(orders_n) * 10])
    plt.legend(loc=2)
    plt.grid()
    plt.xscale('log')
    plt.yscale('log')

    max_perf = max( py/cy for py,cy in zip(times_n['py_lstsqr'],
                                   times_n['cy_lstsqr']) )
    min_perf = min( py/cy for py,cy in zip(times_n['py_lstsqr'],
                                   times_n['cy_lstsqr']) )

    ftext = 'Using Cython is {:.2f}x to '\
        '{:.2f}x faster than regular (C)Python'\
        .format(min_perf, max_perf)

    plt.figtext(.14,.15, ftext, fontsize=11, ha='left')
    plt.title('Performance of least square fit implementations')
    plt.show()



In [6]:

    
import timeit
import random
random.seed(12345)

funcs = ['py_lstsqr', 'numba_lstsqr', 'cy_lstsqr']

orders_n = [10**n for n in range(1, 8)]
times_lists = {f:[] for f in funcs}

for n in orders_n:
    x = [x_i*random.randint(8,12)/10 for x_i in range(n)]
    y = [y_i*random.randint(10,14)/10 for y_i in range(n)]
    for f in funcs:
        times_lists[f].append(min(timeit.Timer('%s(x,y)' %f, 
                      'from __main__ import %s, x, y' %f)
                              .repeat(repeat=3, number=100)))



In [9]:

    
plot_benchmarks(times_lists)

Benchmarking (with numpy arrays):



In [41]:

    
import timeit
import numpy as np
random.seed(12345)

funcs = ['py_lstsqr', 'numba_lstsqr', 'cy_lstsqr']

orders_n = [10**n for n in range(1, 5)]
times_np_arys = {f:[] for f in funcs}

for n in orders_n:
    x = np.asarray([x_i*np.random.randint(8,12)/10 for x_i in range(n)])
    y = np.asarray([y_i*np.random.randint(10,14)/10 for y_i in range(n)])
    for f in funcs:
        times_np_arys[f].append(min(timeit.Timer('%s(x,y)' %f, 
                      'from __main__ import %s, x, y' %f)
                              .repeat(repeat=3, number=100)))



In [42]:

    
plot_benchmarks(times_np_arys)