Cython


In [ ]:
%load_ext memory_profiler
%load_ext snakeviz
%load_ext cython
import holoviews as hv
hv.extension('bokeh','matplotlib')
from IPython.core import debugger
ist = debugger.set_trace

We load in the position and box information created in the intro notebook. If you haven't run that notebook, this line will not work! (You don't have to read the wall of text, just run the cells...)


In [ ]:
import numpy as np
pos = np.loadtxt('data/positions.dat')
box = np.loadtxt('data/box.dat')

x,y,z = pos.T
lx,ly,lz = box

print('Read {:d} positions.'.format(pos.shape[0]))
print('x min/max: {:+4.2f}/{:+4.2f}'.format(pos.min(0)[0],pos.max(0)[0]))
print('y min/max: {:+4.2f}/{:+4.2f}'.format(pos.min(0)[1],pos.max(0)[1]))
print('z min/max: {:+4.2f}/{:+4.2f}'.format(pos.min(0)[2],pos.max(0)[2]))

In [ ]:
%%cython -a
from math import cos,pi,sqrt

cdef double potentialEnergyFunk1(double r, double width=1.0,double height=10.0):
    cdef double U = 0.0
    if r<width:
        U = 0.5 * height * (1 + cos(pi*r/width))
    return U

def calcTotalEnergy1(double[:] x, double[:] y, double[:] z, double lx, double ly, double lz):
    assert len(x) == len(y) == len(z)
    
    cdef double lx2 = lx/2.0
    cdef double ly2 = ly/2.0
    cdef double lz2 = lz/2.0
    
    cdef double dx,dy,dz,dist
    cdef int num_pos =  x.shape[0]
    cdef int i,j
    
    cdef double U = 0
    for i in range(num_pos):
        for j in range(num_pos):
            # We only need to consider each pair once
            if i>=j:
                continue
           
            dx = abs(x[i] - x[j])
            dy = abs(y[i] - y[j])
            dz = abs(z[i] - z[j])
                
            # The next few lines take care of applying the minimum image convention
            # This is neccesary because many/most molecular simulations use periodic boundary conditions
            if dx > lx2:
                dx -= lx
            if dy > ly2:
                dy -= ly
            if dz > lz2:
                dz -= lz
                
            dist = sqrt(dx*dx + dy*dy + dz*dz)
            U += potentialEnergyFunk1(dist)
    
    return U

Runtime profiling!


In [ ]:
%%prun -D prof/cython1.prof
energy = calcTotalEnergy1(x,y,z,lx,ly,lz)

In [ ]:
with open('energy/cython1.dat','w') as f:
    f.write('{}\n'.format(energy))

Memory profiling!


In [ ]:
memprof = %memit -o calcTotalEnergy1(x,y,z,lx,ly,lz)

usage = memprof.mem_usage[0]
incr = memprof.mem_usage[0] - memprof.baseline
with open('prof/cython1.memprof','w') as f:
    f.write('{}\n{}\n'.format(usage,incr))

Round Two: Optimizing the Cython Code


In [ ]:
%%cython -a

from libc.math cimport fabs,pi,cos,sqrt
cimport cython

@cython.cdivision(True) 
cdef double potentialEnergyFunk2(double r, double width=1.0,double height=10.0):
    cdef double U = 0.0
    if r<width:
        U = 0.5 * height * (1 + cos(pi*r/width))
    return U

@cython.boundscheck(False) 
def calcTotalEnergy2(double[:] x, double[:] y, double[:] z, double lx, double ly, double lz):
    assert len(x) == len(y) == len(z)
    
    cdef double lx2 = lx/2.0
    cdef double ly2 = ly/2.0
    cdef double lz2 = lz/2.0
    
    cdef double dx,dy,dz,dist
    cdef int num_pos =  x.shape[0]
    cdef int i,j
    
    cdef double U = 0
    for i in range(num_pos):
        for j in range(num_pos):
            # We only need to consider each pair once
            if i>=j:
                continue
           
            dx = fabs(x[i] - x[j])
            dy = fabs(y[i] - y[j])
            dz = fabs(z[i] - z[j])
                
            # The next few lines take care of applying the minimum image convention
            # This is neccesary because many/most molecular simulations use periodic boundary conditions
            if dx > lx2:
                dx -= lx
            if dy > ly2:
                dy -= ly
            if dz > lz2:
                dz -= lz
                
            dist = sqrt(dx*dx + dy*dy + dz*dz)
            U += potentialEnergyFunk2(dist)
    
    return U

Runtime profiling!


In [ ]:
%%prun -D prof/cython2.prof
energy = calcTotalEnergy2(x,y,z,lx,ly,lz)

In [ ]:
with open('energy/cython2.dat','w') as f:
    f.write('{}\n'.format(energy))

Memory profiling!


In [ ]:
memprof = %memit -o calcTotalEnergy2(x,y,z,lx,ly,lz)

usage = memprof.mem_usage[0]
incr = memprof.mem_usage[0] - memprof.baseline
with open('prof/cython2.memprof','w') as f:
    f.write('{}\n{}\n'.format(usage,incr))

Round 3: Parallelized Cython


In [ ]:
%%cython

from libc.math cimport fabs,pi,cos,sqrt
cimport cython
from cython.parallel import prange

@cython.cdivision(True) 
cdef double potentialEnergyFunk3(double r, double width=1.0,double height=10.0) nogil:
    cdef double U = 0.0
    if r<width:
        U = 0.5 * height * (1 + cos(pi*r/width))
    return U

@cython.boundscheck(False) 
def calcTotalEnergy3(double[:] x, double[:] y, double[:] z, double lx, double ly, double lz):
    assert len(x) == len(y) == len(z)
    
    cdef double lx2 = lx/2.0
    cdef double ly2 = ly/2.0
    cdef double lz2 = lz/2.0
    
    cdef double dx,dy,dz,dist
    cdef int num_pos =  x.shape[0]
    cdef int i,j
    
    cdef double U = 0
    for i in prange(num_pos-1,schedule='guided',nogil=True):
        for j in range(i+1,num_pos):
           
            dx = fabs(x[i] - x[j])
            dy = fabs(y[i] - y[j])
            dz = fabs(z[i] - z[j])
                
            # Note the changes I had to make in the next 6 lines
            # compared to above. This has to do with how cython/OpenMP
            # shares data amongst threads
            if dx > lx2:
                dx = dx - lx
            if dy > ly2:
                dy = dy - ly
            if dz > lz2:
                dz = dz - lz
                
            dist = sqrt(dx*dx + dy*dy + dz*dz)
            U += potentialEnergyFunk3(dist)
    
    return U

Runtime profiling!


In [ ]:
%%prun -D prof/cython3.prof
energy = calcTotalEnergy3(x,y,z,lx,ly,lz)

In [ ]:
with open('energy/cython3.dat','w') as f:
    f.write('{}\n'.format(energy))

Memory profiling!


In [ ]:
memprof = %memit -o calcTotalEnergy3(x,y,z,lx,ly,lz)

usage = memprof.mem_usage[0]
incr = memprof.mem_usage[0] - memprof.baseline
with open('prof/cython3.memprof','w') as f:
    f.write('{}\n{}\n'.format(usage,incr))

In [ ]: