Numba


In [ ]:
%load_ext memory_profiler
%load_ext snakeviz
%load_ext cython
import holoviews as hv
hv.extension('bokeh','matplotlib')
from IPython.core import debugger
ist = debugger.set_trace

We load in the position and box information created in the intro notebook. If you haven't run that notebook, this line will not work! (You don't have to read the wall of text, just run the cells...)


In [ ]:
import numpy as np
pos = np.loadtxt('data/positions.dat')
box = np.loadtxt('data/box.dat')

x,y,z = map(list,pos.T)
lx,ly,lz = box

print('Read {:d} positions.'.format(pos.shape[0]))
print('x min/max: {:+4.2f}/{:+4.2f}'.format(pos.min(0)[0],pos.max(0)[0]))
print('y min/max: {:+4.2f}/{:+4.2f}'.format(pos.min(0)[1],pos.max(0)[1]))
print('z min/max: {:+4.2f}/{:+4.2f}'.format(pos.min(0)[2],pos.max(0)[2]))

For numba, we use exactly the same implementations of code as the Python case, we just decorate the functions with the JIT. First try running with only decorating the calcTotalEnergy function.


In [ ]:
from math import cos,pi,sqrt
import numba

def potentialEnergyFunk1(r,width=1.0,height=10.0):
    '''
    Calculates the (soft) potential energy between two atoms
    
    Parameters
    ----------
    r: float
        separation distance between two atoms
    height: float
        breadth of the potential i.e. where the potential goes to zero
    width: float
        strength/height of the potential
    '''
    if r<width:
        return 0.5 * height * (1 + cos(pi*r/width))
    else:
        return 0
    
from math import sqrt

@numba.jit()
def calcTotalEnergy1(x,y,z,lx,ly,lz):
    '''
    Parameters
    ----------
    x,y,z: lists of floats
        1-D lists of cartesian coordinate positions
    
    lx,ly,lz: floats
        simulation box dimensions
    '''
    
    #sanity check
    assert len(x) == len(y) == len(z)
    
    # store half box lengths for minimum image convention
    lx2 = lx/2.0
    ly2 = ly/2.0
    lz2 = lz/2.0
    
    U = 0
    
    #The next two loops simply loop over every element in the x, y, and z arrays
    for i,(x1,y1,z1) in enumerate(zip(x,y,z)):
        for j,(x2,y2,z2) in enumerate(zip(x,y,z)):
            
            # We only need to consider each pair once
            if i>=j:
                continue
           
            dx = abs(x1 - x2)
            dy = abs(y1 - y2)
            dz = abs(z1 - z2)
                
            # The next few lines take care of applying the minimum image convention
            # This is neccesary because many/most molecular simulations use periodic boundary conditions
            if dx > lx2:
                dx -= lx
            if dy > ly2:
                dy -= ly
            if dz > lz2:
                dz -= lz
                
            dist = sqrt(dx*dx + dy*dy + dz*dz)
            
            U += potentialEnergyFunk1(dist)
    
    return U

Runtime profiling!


In [ ]:
%%prun -D prof/numba1.prof
energy = calcTotalEnergy1(x,y,z,lx,ly,lz)

In [ ]:
with open('energy/numba1.dat','w') as f:
    f.write('{}\n'.format(energy))

Memory profiling!


In [ ]:
memprof = %memit -o calcTotalEnergy1(x,y,z,lx,ly,lz)

usage = memprof.mem_usage[0]
incr = memprof.mem_usage[0] - memprof.baseline
with open('prof/numba1.memprof','w') as f:
    f.write('{}\n{}\n'.format(usage,incr))

Round Two: Double Wrapping

Perhaps this result is disappointing. What happens if we wrap both functions?


In [ ]:
from math import cos,pi,sqrt
import numba

@numba.jit()
def potentialEnergyFunk2(r,width=1.0,height=10.0):
    '''
    Calculates the (soft) potential energy between two atoms
    
    Parameters
    ----------
    r: float
        separation distance between two atoms
    height: float
        breadth of the potential i.e. where the potential goes to zero
    width: float
        strength/height of the potential
    '''
    if r<width:
        return 0.5 * height * (1 + cos(pi*r/width))
    else:
        return 0
    
from math import sqrt

@numba.jit()
def calcTotalEnergy2(x,y,z,lx,ly,lz):
    '''
    Parameters
    ----------
    x,y,z: lists of floats
        1-D lists of cartesian coordinate positions
    
    lx,ly,lz: floats
        simulation box dimensions
    '''
    
    #sanity check
    assert len(x) == len(y) == len(z)
    
    # store half box lengths for minimum image convention
    lx2 = lx/2.0
    ly2 = ly/2.0
    lz2 = lz/2.0
    
    U = 0
    
    #The next two loops simply loop over every element in the x, y, and z arrays
    for i,(x1,y1,z1) in enumerate(zip(x,y,z)):
        for j,(x2,y2,z2) in enumerate(zip(x,y,z)):
            
            # We only need to consider each pair once
            if i>=j:
                continue
           
            dx = abs(x1 - x2)
            dy = abs(y1 - y2)
            dz = abs(z1 - z2)
                
            # The next few lines take care of applying the minimum image convention
            # This is neccesary because many/most molecular simulations use periodic boundary conditions
            if dx > lx2:
                dx -= lx
            if dy > ly2:
                dy -= ly
            if dz > lz2:
                dz -= lz
                
            dist = sqrt(dx*dx + dy*dy + dz*dz)
            
            U += potentialEnergyFunk2(dist)
    
    return U

Runtime profiling!


In [ ]:
%%prun -D prof/numba2.prof
energy = calcTotalEnergy2(x,y,z,lx,ly,lz)

In [ ]:
with open('energy/numba2.dat','w') as f:
    f.write('{}\n'.format(energy))

Memory profiling!


In [ ]:
memprof = %memit -o calcTotalEnergy2(x,y,z,lx,ly,lz)

usage = memprof.mem_usage[0]
incr = memprof.mem_usage[0] - memprof.baseline
with open('prof/numba2.memprof','w') as f:
    f.write('{}\n{}\n'.format(usage,incr))

Round 3: Numba + Numpy?

So, a typical workflow might be to write your code with Python + Numpy and then wrap slow functions with the numba jit. What happens if we wrap a function using Numpy?


In [ ]:
import numba
import numpy as np

@numba.jit()
def potentialEnergyFunk3(r,width=1.0,height=10.0):
    '''
    Calculates the (soft) potential energy between two atoms
    
    Parameters
    ----------
    r: ndarray (float)
        separation distances between two atoms
    height: float
        breadth of the potential i.e. where the potential goes to zero
    width: float
        strength/height of the potential
    '''
    U = np.zeros_like(r)
    mask = (r<width) #only do calculation below the cutoff width
    U[mask] = 0.5 * height * (1 + np.cos(np.pi*r[mask]/width))
    return U

@numba.jit()
def calcTotalEnergy3(pos,box):
    '''
    Parameters
    ----------
    pos: ndarray, size (N,3), (float)
        array of cartesian coordinate positions
    
    box: ndarray, size (3), (float)
        simulation box dimensions
    '''
    
    #sanity check
    assert box.shape[0] == 3
    
    # This next line is rather unpythonic but essentially it convinces
    # numpy to perform a subtraction between the full Cartesian Product
    # of the positions array
    dr = np.abs(pos - pos[:,np.newaxis,:])
    
    #extract out upper triangle
    dr = dr[np.triu_indices(dr.shape[0],k=1)]  #<<<<<<<
    
    #still need to apply periodic boundary conditions
    dr = np.where(dr>box/2.0,dr-box,dr)
        
    dist = np.sqrt(np.sum(np.square(dr),axis=-1))
    
    # calculate the full N x N distance matrix
    U = potentialEnergyFunk3(dist)
    
    return U.sum()

In [ ]:
%%prun -D prof/numba3.prof
energy = calcTotalEnergy3(pos,box)

In [ ]:
with open('energy/numba3.dat','w') as f:
    f.write('{}\n'.format(energy))

Memory profiling!


In [ ]:
memprof = %memit -o calcTotalEnergy3(pos,box)

usage = memprof.mem_usage[0]
incr = memprof.mem_usage[0] - memprof.baseline
with open('prof/numba3.memprof','w') as f:
    f.write('{}\n{}\n'.format(usage,incr))

In [ ]: