In [1]:
n=1000
mat=np.random.rand(n,n)

n2=100
mat2=np.random.rand(n2,n2)

n3=10
mat3=np.random.rand(n3,n3)

In [2]:
mat.shape


Out[2]:
(1000, 1000)

Python, Numpy


In [3]:
def matmul(mat1,mat2):
    out=np.zeros((mat1.shape[0],mat2.shape[1]))
    for i in xrange(mat1.shape[0]):
        for j in xrange(mat2.shape[1]):
            for k in xrange(mat1.shape[1]):
                out[i,j]+=mat1[i,k]*mat2[k,j]
    return out

def matmul_dot(mat1,mat2):
    return np.dot(mat1,mat2)

In [4]:
%timeit -r 3 -n 1 matmul(mat,mat)


1 loops, best of 3: 19min 22s per loop

In [5]:
%timeit -r 3 -n 1 matmul(mat2,mat2)


1 loops, best of 3: 1.15 s per loop

In [6]:
%timeit -r 3 -n 1 matmul(mat3,mat3)


1 loops, best of 3: 1.48 ms per loop

In [6]:


In [7]:
%timeit -r 3 -n 1 matmul_dot(mat,mat)


1 loops, best of 3: 48 ms per loop

In [8]:
%timeit -r 3 -n 1 matmul_dot(mat2,mat2)


1 loops, best of 3: 61 µs per loop

In [9]:
%timeit -r 3 -n 1 matmul_dot(mat3,mat3)


1 loops, best of 3: 1.91 µs per loop

Numba


In [10]:
import numba

matmul_nb=numba.autojit(matmul)

In [11]:
%timeit -r 3 -n 1 matmul_nb(mat,mat)


1 loops, best of 3: 3.79 s per loop

In [12]:
%timeit -r 3 -n 1 matmul_nb(mat2,mat2)


1 loops, best of 3: 2.83 ms per loop

In [13]:
%timeit -r 3 -n 1 matmul_nb(mat3,mat3)


1 loops, best of 3: 5.96 µs per loop

Cython


In [14]:
%load_ext cythonmagic

In [15]:
%%cython
import cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def matmul_cy(np.ndarray[double,ndim=2] mat1,np.ndarray[double,ndim=2] mat2):
    cdef np.ndarray[double,ndim=2] out=np.zeros((mat1.shape[0],mat2.shape[1]))
    cdef int i,j,k
    for i in xrange(mat1.shape[0]):
        for j in xrange(mat2.shape[1]):
            for k in xrange(mat1.shape[1]):
                out[i,j]+=mat1[i,k]*mat2[k,j]
    return out

In [16]:
%timeit -r 3 -n 1 matmul_cy(mat,mat)


1 loops, best of 3: 3.41 s per loop

In [17]:
%timeit -r 3 -n 1 matmul_cy(mat2,mat2)


1 loops, best of 3: 2.69 ms per loop

In [18]:
%timeit -r 3 -n 1 matmul_cy(mat3,mat3)


1 loops, best of 3: 5.01 µs per loop

Fortran


In [19]:
%%file matmul.f90
subroutine matmul_s(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
integer i,j,k
do i=1,n1
do j=1,n3
    mat3(i,j)=0.
    do k=1,n2
        mat3(i,j)=mat3(i,j)+mat1(i,k)*mat2(k,j)
    enddo
enddo
enddo
end subroutine

subroutine matmul_fi(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
mat3=matmul(mat1,mat2)
end subroutine


Overwriting matmul.f90

In [20]:
!f2py -c -m matmul_fortran matmul.f90 --f90exec=/opt/local/bin/gfortran-mp-4.9 > log.txt

In [21]:
import matmul_fortran
print matmul_fortran.__doc__


This module 'matmul_fortran' is auto-generated with f2py (version:2).
Functions:
  mat3 = matmul_s(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1))
  mat3 = matmul_f(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1))
  mat3 = matmul_fi(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1))
.

In [22]:
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat,mat)


1 loops, best of 3: 1.3 s per loop

In [23]:
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat2,mat2)


1 loops, best of 3: 770 µs per loop

In [24]:
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat3,mat3)


1 loops, best of 3: 5.01 µs per loop

In [25]:


In [26]:
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat,mat)


1 loops, best of 3: 609 ms per loop

In [27]:
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat2,mat2)


1 loops, best of 3: 700 µs per loop

In [28]:
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat3,mat3)


1 loops, best of 3: 4.05 µs per loop

In [28]: