In [1]:
n=500
mat=np.random.rand(n,n)

In [2]:
mat.shape


Out[2]:
(500, 500)

Python, Numpy


In [3]:
def matmul(mat1,mat2):
    out=np.zeros((mat1.shape[0],mat2.shape[1]))
    for i in xrange(mat1.shape[0]):
        for j in xrange(mat2.shape[1]):
            for k in xrange(mat1.shape[1]):
                out[i,j]+=mat1[i,k]*mat2[k,j]
    return out

def matmul_dot(mat1,mat2):
    return np.dot(mat1,mat2)

In [4]:
%timeit -r 3 -n 1 matmul(mat,mat)


1 loops, best of 3: 2min 19s per loop

In [6]:
%timeit -r 3 -n 1 matmul_dot(mat,mat)


1 loops, best of 3: 6.17 ms per loop

Numba


In [7]:
import numba

matmul_nb=numba.autojit(matmul)

In [8]:
%timeit -r 3 -n 1 matmul_nb(mat,mat)


1 loops, best of 3: 329 ms per loop

Cython


In [10]:
%load_ext cythonmagic

In [11]:
%%cython
import cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def matmul_cy(np.ndarray[double,ndim=2] mat1,np.ndarray[double,ndim=2] mat2):
    cdef np.ndarray[double,ndim=2] out=np.zeros((mat1.shape[0],mat2.shape[1]))
    cdef int i,j,k
    for i in xrange(mat1.shape[0]):
        for j in xrange(mat2.shape[1]):
            for k in xrange(mat1.shape[1]):
                out[i,j]+=mat1[i,k]*mat2[k,j]
    return out

In [12]:
%timeit -r 3 -n 1 matmul_cy(mat,mat)


1 loops, best of 3: 334 ms per loop

Fortran


In [14]:
%%file matmul.f90
subroutine matmul_s(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
integer i,j,k
do i=1,n1
do j=1,n3
    mat3(i,j)=0.
    do k=1,n2
        mat3(i,j)=mat3(i,j)+mat1(i,k)*mat2(k,j)
    enddo
enddo
enddo
end subroutine

subroutine matmul_fi(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
mat3=matmul(mat1,mat2)
end subroutine


Overwriting matmul.f90

In [15]:
!f2py -c -m matmul_fortran matmul.f90 --f90exec=/opt/local/bin/gfortran-mp-4.9 > log.txt

In [16]:
import matmul_fortran
print matmul_fortran.__doc__


This module 'matmul_fortran' is auto-generated with f2py (version:2).
Functions:
  mat3 = matmul_s(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1))
  mat3 = matmul_f(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1))
  mat3 = matmul_fi(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1))
.

In [17]:
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat,mat)


1 loops, best of 3: 108 ms per loop

In [19]:
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat,mat)


1 loops, best of 3: 70.7 ms per loop

In [21]: