In [1]:
%load_ext cython
%load_ext autoreload
%autoreload 2

In [2]:
from psa import *
from MDAnalysis import Universe
from MDAnalysis.analysis.align import rotation_matrix
import numpy as np
import os, sys
WORKDIR = '/nfs/homes/sseyler/Repositories/python/psanalysis'
sys.path.append(WORKDIR)

In [3]:
print("Generating AdK CORE C-alpha reference coords + structure...")
cref_filename = '%s/structs/1ake_a_ca_core.pdb' % WORKDIR
oref_filename = '%s/structs/4ake_a_ca_core.pdb' % WORKDIR

c_ref = MDAnalysis.Universe(cref_filename)
o_ref = MDAnalysis.Universe(oref_filename)
u_ref = MDAnalysis.Universe(cref_filename)

c_ref_ca = c_ref.selectAtoms('name CA')
o_ref_ca = o_ref.selectAtoms('name CA')

adkCORE_resids = "(resid 1:29 or resid 60:121 or resid 160:214)"
c_ref_CORE_ca = c_ref_ca.selectAtoms(adkCORE_resids).coordinates() \
        - c_ref_ca.selectAtoms(adkCORE_resids).centerOfMass()
o_ref_CORE_ca = o_ref_ca.selectAtoms(adkCORE_resids).coordinates() \
        - o_ref_ca.selectAtoms(adkCORE_resids).centerOfMass()
ref_coords = 0.5*(c_ref_CORE_ca + o_ref_CORE_ca)

u_ref.atoms.translate(-c_ref_ca.selectAtoms(adkCORE_resids).centerOfMass())
o_ref.atoms.translate(-o_ref_ca.selectAtoms(adkCORE_resids).centerOfMass())
u_ref.selectAtoms(adkCORE_resids).CA.set_positions(ref_coords)


Generating AdK CORE C-alpha reference coords + structure...

In [142]:
print("Building collection of simulations...")
# List of method names (same as directory names)
# method_names = ['DIMS', 'FRODA', 'MAP']
method_names = ['DIMS', 'TMD', 'FRODA', 'MAP', 'iENM', 'MENM-SP', 'MENM-SD',       \
                'MDdMD', 'GOdMD', 'Morph', 'ANMP', 'LinInt']
labels = [] # Heat map labels
simulations = [] # List of simulation topology/trajectory filename pairs
universes = [] # List of MDAnalysis Universes representing simulations

# Build list of simulations, each represented by a pair of filenames
# ([topology filename], [trajectory filename]). Generate corresponding label
# list.
for method in method_names:
    # Note: DIMS uses the PSF topology format
    topname = 'top.psf' if ('DIMS' in method or 'TMD' in method) else 'top.pdb'
    pathname = 'path.dcd'
    method_dir = '{}/methods/{}'.format(WORKDIR, method)
    if method is not 'LinInt':
        if 'TMD' in method:
            for run in xrange(1, 11): # 3 runs per method
                run_dir = '{}/{:03n}'.format(method_dir, run)
                topology = '{}/{}'.format(method_dir, topname)
                trajectory = '{}/{}'.format(run_dir, pathname)
                labels.append(method + '(' + str(run) + ')')
                simulations.append((topology, trajectory))
#         nruns = 3 if 'TMD' not in method else 9
        else:
            for run in xrange(1, nruns+1): # 3 runs per method
                run_dir = '{}/{:03n}'.format(method_dir, run)
                topology = '{}/{}'.format(method_dir, topname)
                trajectory = '{}/{}'.format(run_dir, pathname)
                labels.append(method + '(' + str(run) + ')')
                simulations.append((topology, trajectory))
    else: # only one LinInt trajectory
        topology = '{}/{}'.format(method_dir, topname)
        trajectory = '{}/{}'.format(method_dir, pathname)
        labels.append(method)
        simulations.append((topology, trajectory))


Building collection of simulations...

In [143]:
# Generate simulation list represented as Universes. Each item, sim, in
# simulations is a topology/trajectory filename pair that is unpacked into
# an argument list with the "splat" ("*") operator.
for sim in simulations:
    universes.append(Universe(*sim))

In [144]:
print("Initializing Path Similarity Analysis...")
ref_selection = "name CA and " + adkCORE_resids
psa_full = PSA(universes, reference=u_ref, ref_select=ref_selection,
                    path_select="name CA", labels=labels)


Initializing Path Similarity Analysis...

In [145]:
print("Generating Path objects from aligned trajectories...")
psa_full.generate_paths(align=True, store=True)


Fitted frame   102/102  [100.0%]
Fitted frame    92/92  [100.0%]
Fitted frame    95/95  [100.0%]
Fitted frame    97/97  [100.0%]
Fitted frame    98/98  [100.0%]
Fitted frame    98/98  [100.0%]
Fitted frame    98/98  [100.0%]
Fitted frame    98/98  [100.0%]
Fitted frame    98/98  [100.0%]
Fitted frame    91/91  [100.0%]
Fitted frame    91/91  [100.0%]
Fitted frame    91/91  [100.0%]
Fitted frame   495/495  [100.0%]
Fitted frame   142/142  [100.0%]
Fitted frame   141/141  [100.0%]
Fitted frame   142/142  [100.0%]
Fitted frame   201/201  [100.0%]
Fitted frame   201/201  [100.0%]
Fitted frame   201/201  [100.0%]
Fitted frame    33/33  [100.0%]
Fitted frame    34/34  [100.0%]
Fitted frame    36/36  [100.0%]
Fitted frame    52/52  [100.0%]
Fitted frame    51/51  [100.0%]
Fitted frame    51/51  [100.0%]
Fitted frame    49/49  [100.0%]
Fitted frame    45/45  [100.0%]
Fitted frame    43/43  [100.0%]
Fitted frame    54/54  [100.0%]
Fitted frame    64/64  [100.0%]
Fitted frame    48/48  [100.0%]
Fitted frame    81/81  [100.0%]
Fitted frame   145/145  [100.0%]
Fitted frame   226/226  [100.0%]
Fitted frame   100/100  [100.0%]
Fitted frame   100/100  [100.0%]
Fitted frame   100/100  [100.0%]
Fitted frame   107/107  [100.0%]
Fitted frame   107/107  [100.0%]
Fitted frame   107/107  [100.0%]
Fitted frame   100/100  [100.0%]
Generating Path objects from aligned trajectories...

=============================


In [146]:
# metric = 'discrete_frechet'
metric = frechet_v6b
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_ward_psa-withTMD-1-10.pdf'

In [147]:
print("Calculating distance matrix...")
psa_full.run(metric=metric)


Calculating distance matrix...

In [148]:
print("Plotting heat map-dendrogram for hierarchical clustering...")
psa_full.plot(filename=plotname, linkage=linkage);


Plotting heat map-dendrogram for hierarchical clustering...

In [149]:
df_default = psa_full.D

In [150]:
# metric = 'hausdorff'
metric = hausdorff_v6
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_ward_psa-withTMD-1-10.pdf'

In [151]:
print("Calculating distance matrix...")
psa_full.run(metric=metric)


Calculating distance matrix...

In [152]:
print("Plotting heat map-dendrogram for hierarchical clustering...")
psa_full.plot(filename=plotname, linkage=linkage);


Plotting heat map-dendrogram for hierarchical clustering...

In [153]:
dh_default = psa_full.D

=============================

Hausdorff

Baseline


In [16]:
%timeit -n3 psa_full.run(metric='hausdorff')


3 loops, best of 3: 614 ms per loop

Version 0


In [14]:
# %%cython -f -c=-Ofast -c=-march=native
# import numpy as np
# cimport numpy as np
# from libc.math cimport sqrt
# cimport cython

def sqnorm(v, axis=None):
    return (v*v).sum(axis=axis)

def hausdorff_v0(P,Q, N=-1):
    if N == -1:
        N = P.shape[1] # number of atoms from 2nd dim of P
        axis = (1,2)
    else:
        axis = 1
    d = np.array([sqnorm(p - Q, axis) for p in P])
    return ( max( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )**0.5

In [15]:
%timeit -n3 psa_full.run(metric=hausdorff_v0)


3 loops, best of 3: 605 ms per loop

In [17]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v0_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 1


In [21]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython

def sqnorm(v):
    return (v*v).sum(axis=1)

def hausdorff_v1(P, Q, N):
    d = np.array([sqnorm(p - Q) for p in P])
    return ( max( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )**0.5

In [22]:
%timeit -n3 psa_full.run(metric=hausdorff_v1)


3 loops, best of 3: 617 ms per loop

In [23]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v1_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 2


In [24]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython

cdef np.ndarray sqnorm(np.ndarray v):
    return (v*v).sum(axis=1)

def hausdorff_v2(P, Q, N):
    d = np.array([sqnorm(p - Q) for p in P])
    return ( max( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )**0.5

In [25]:
%timeit -n3 psa_full.run(metric=hausdorff_v2)


3 loops, best of 3: 607 ms per loop

In [26]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v2_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 3

sqnorm using numpy.sum()


In [88]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython

ctypedef float (*fcn_ptr)(np.ndarray, np.ndarray)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v1, np.ndarray[float, ndim=1, mode='c'] v2):
    cdef np.ndarray[float, ndim=1, mode='c'] diff = v1 - v2
    return (diff*diff).sum()

@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v3(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.intp_t N):
    cdef np.intp_t i, j, lenP, lenQ    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
    cdef fcn_ptr f = &sqnorm
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            d[i,j] = f(P[i], Q[j])
    return sqrt( fmax( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )

In [89]:
%timeit -n1 psa_full.run(metric=hausdorff_v3)


1 loops, best of 3: 3.81 s per loop

Note: This is VERY slow compared to other solutions because the element-wise subtraction is happening with NumPy and cannot be fully converted to fast C code. Using the '-a' option with the %%cython command highlights the lines in sqnorm as not-very-Cythonic.


In [32]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v3_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 3b

sqnorm using explicit loop


In [91]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython

ctypedef float (*fcn_ptr)(np.ndarray, np.ndarray)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v1, np.ndarray[float, ndim=1, mode='c'] v2):
    cdef float diff, s
    cdef np.intp_t k
    s = 0.0
    for k in xrange(v1.shape[0]):
        diff = v1[k] - v2[k]
        s += diff*diff
    return s

@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v3b(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.intp_t N):
    cdef np.intp_t i, j, lenP, lenQ    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
    cdef fcn_ptr f = &sqnorm
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            d[i,j] = f(P[i], Q[j])
    return sqrt( fmax( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )

In [92]:
%timeit -n1 psa_full.run(metric=hausdorff_v3b)


1 loops, best of 3: 877 ms per loop

In [93]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v3b_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 4

Memviews, Cython RMSD, NumPy min/max + NO decorators


In [94]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython

cdef float sqnorm(float[:] p, float[:] q):
    cdef np.intp_t i
    cdef float s, temp
    s = 0.0
    for i in xrange(p.shape[0]):
        temp = p[i] - q[i]
        s += temp*temp
    return s 

def hausdorff_v4(float[:,:] P, float[:,:] Q, np.intp_t N):
    cdef np.intp_t i, j, lenP, lenQ    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef float[:,:] d = np.empty((lenP, lenQ), dtype='float32')
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            d[i,j] = sqnorm(P[i], Q[j])
    return sqrt( fmax( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )

In [95]:
%timeit -n3 psa_full.run(metric=hausdorff_v4)


3 loops, best of 3: 788 ms per loop

In [96]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v4_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 4b

Memviews, Cython RMSD, NumPy min/max + decorators


In [97]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[:] p, float[:] q):
    cdef np.intp_t i
    cdef float s, temp
    s = 0.0
    for i in xrange(p.shape[0]):
        temp = p[i] - q[i]
        s += temp*temp
    return s 

def hausdorff_v4b(float[:,:] P, float[:,:] Q, np.intp_t N):
    cdef np.intp_t i, j, lenP, lenQ
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef float[:,:] d = np.empty((lenP, lenQ), dtype='float32')
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            d[i,j] = sqnorm(P[i], Q[j])
    return sqrt( fmax( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )

In [98]:
%timeit -n3 psa_full.run(metric=hausdorff_v4b)


3 loops, best of 3: 365 ms per loop

In [99]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v4b_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 5

Memviews, Cython RMSD, NumPy min/max + C-ordered arrays (sqnorm pointed doesn't add much speed)


In [83]:
%%cython -f -c=-O3 -c=-march=native -c=-funroll-loops -c=-ffast-math
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
import cython
cimport cython

ctypedef float (*fcn_ptr)(float[::1], float[::1])

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[::1] p, float[::1] q):
    cdef np.intp_t k
    cdef float s, temp
    s = 0.0
    for k in xrange(p.shape[0]):
        temp = p[k] - q[k]
        s += temp*temp
    return s
        
@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v5(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t i, j, lenP, lenQ    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    cdef fcn_ptr f = &sqnorm
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            d[i,j] = f(P[i,:], Q[j,:])
    return sqrt( fmax( np.amax(np.amin(d, axis=0)),             \
                  np.amax(np.amin(d, axis=1)) ) / N  )

In [84]:
%timeit -n10 psa_full.run(metric=hausdorff_v5)


10 loops, best of 3: 146 ms per loop

In [74]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v5_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 6


In [106]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax, fmin
import cython
cimport cython

ctypedef float[::1] (*minfcn_ptr)(float[:,::1])
ctypedef float (*maxfcn_ptr)(float[::1])

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[::1] getmin(float[:,::1] d):
    cdef np.intp_t i, j
    cdef float[::1] d_row = np.empty(d.shape[1], dtype='float32')
    cdef float[::1] rowmin = np.empty(d.shape[0], dtype='float32')
    cdef float m = 10000000
    
    for i in xrange(d.shape[0]):
        d_row = d[i,:]
        for j in xrange(d.shape[1]):
            m = fmin(d_row[j], m)
        rowmin[i] = m
    return rowmin

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float getmax(float[::1] d):
    cdef np.intp_t i
    cdef float m = -1
    
    for i in xrange(d.shape[0]):
        m = fmax(d[i], m)
    return m

@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v6(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t i, j, k, lenP, lenQ    
    lenP = P.shape[0]
    lenQ = Q.shape[0]

    assert int(P.shape[1]) == int(3*N)
    assert P.shape[1] == Q.shape[1]
    
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    cdef float s, diff = 0.0
    cdef minfcn_ptr minf = &getmin
    cdef maxfcn_ptr maxf = &getmax
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s

#     return sqrt( fmax( maxf(minf(d)), maxf(np.amin(d, axis=1)) ) / N  )
#     return sqrt( fmax( maxf(minf(d)), maxf(minf(d.T)) ) / N  )
    return sqrt( fmax( np.amax(np.amin(d, axis=0)), np.amax(np.amin(d, axis=1)) ) / N  )

In [95]:
%timeit -n10 psa_full.run(metric=hausdorff_v6)


10 loops, best of 3: 120 ms per loop

The custom Cython functions are as fast as the numpy.amax/amin solution


In [90]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v6_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Optimized


In [ ]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax, fmin
import cython
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k
    cdef float s, diff
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_opt(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k

    assert int(P.shape[1]) == int(3*N)
    assert P.shape[1] == Q.shape[1]
    
    cdef float[:,::1] d = rmsdMatrix_c(P, Q)

    return sqrt( fmax( np.amax(np.amin(d, axis=0)), np.amax(np.amin(d, axis=1)) ) / N  )

In [95]:
%timeit -n10 psa_full.run(metric=hausdorff_opt)


10 loops, best of 3: 120 ms per loop

In [90]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_opt_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Frechet

Baseline


In [119]:
%timeit -n3 psa_full.run(metric='discrete_frechet')


3 loops, best of 3: 3.01 s per loop

Version 1


In [97]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython

def sqnorm(v):
    return (v*v).sum(axis=1)

def frechet_v1(P, Q, N):
    Np, Nq = len(P), len(Q)
    d = np.array([sqnorm(p - Q) for p in P])
    ca = -np.ones((Np, Nq))
    
    def c(i, j):
        if ca[i,j] != -1 : return ca[i,j]
        if i > 0:
            if j > 0: ca[i,j] = max( min(c(i-1,j),c(i,j-1),c(i-1,j-1)), d[i,j] )
            else:     ca[i,j] = max( c(i-1,0), d[i,0] )
        elif j > 0:   ca[i,j] = max( c(0,j-1), d[0,j] )
        else:         ca[i,j] = d[0,0]
        return        ca[i,j]

    return ( c(Np-1, Nq-1) / N )**0.5

In [98]:
%timeit -n3 psa_full.run(metric=frechet_v1)


3 loops, best of 3: 2.4 s per loop

In [99]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v1_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 2


In [100]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython

cdef np.ndarray sqnorm(np.ndarray v):
    return (v*v).sum(axis=1)

def frechet_v2(P, Q, N):
    Np, Nq = len(P), len(Q)
    d = np.array([sqnorm(p - Q) for p in P])
    ca = -np.ones((Np, Nq))
    
    def c(i, j):
        if ca[i,j] != -1 : return ca[i,j]
        if i > 0:
            if j > 0: ca[i,j] = max( min(c(i-1,j),c(i,j-1),c(i-1,j-1)), d[i,j] )
            else:     ca[i,j] = max( c(i-1,0), d[i,0] )
        elif j > 0:   ca[i,j] = max( c(0,j-1), d[0,j] )
        else:         ca[i,j] = d[0,0]
        return        ca[i,j]

    return ( c(Np-1, Nq-1) / N )**0.5

In [101]:
%timeit -n3 psa_full.run(metric=frechet_v2)


3 loops, best of 3: 2.43 s per loop

In [102]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v2_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 3


In [108]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython

cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v):
    return (v*v).sum()

def frechet_v3(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.uint32_t N):
    cdef np.intp_t lenP, lenQ, i, j
    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef np.ndarray[float, ndim=1, mode='c'] temp
    cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
    cdef np.ndarray[float, ndim=2, mode='c'] ca = -np.ones((lenP, lenQ), dtype='float32')
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            temp = P[i]-Q[j]
            d[i,j] = sqnorm(temp)

    def c(np.ndarray[float, ndim=2, mode='c'] d, np.ndarray[float, ndim=2, mode='c'] ca, np.intp_t i, np.intp_t j):
        if ca[i,j] != -1 : return ca[i,j]
        if i > 0:
            if j > 0: ca[i,j] = max( min(c(d,ca,i-1,j),c(d,ca,i,j-1),c(d,ca,i-1,j-1)), d[i,j] )
            else:     ca[i,j] = max( c(d,ca,i-1,0), d[i,0] )
        elif j > 0:   ca[i,j] = max( c(d,ca,0,j-1), d[0,j] )
        else:         ca[i,j] = d[0,0]
        return        ca[i,j]

    return sqrt( c(d, ca, lenP-1, lenQ-1) / N )

In [109]:
%timeit -n1 psa_full.run(metric=frechet_v3)


1 loops, best of 3: 4.59 s per loop

In [59]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v3_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 4


In [112]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython


cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v):
    return (v*v).sum()


cdef float c(np.ndarray[float, ndim=2, mode='c'] d, np.ndarray[float, ndim=2, mode='c'] ca, np.intp_t i, np.intp_t j):
    if ca[i,j] != -1 : return ca[i,j]
    if i > 0:
        if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,i-1,j),c(d,ca,i,j-1)),c(d,ca,i-1,j-1)), d[i,j] )
        else:     ca[i,j] = fmax( c(d,ca,i-1,0), d[i,0] )
    elif j > 0:   ca[i,j] = fmax( c(d,ca,0,j-1), d[0,j] )
    else:         ca[i,j] = d[0,0]
    return        ca[i,j]


def frechet_v4(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.uint32_t N):
    cdef np.intp_t lenP, lenQ, i, j
    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef np.ndarray[float, ndim=1, mode='c'] temp
    cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
    cdef np.ndarray[float, ndim=2, mode='c'] ca = -np.ones((lenP, lenQ), dtype='float32')
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            temp = P[i]-Q[j]
            d[i,j] = sqnorm(temp)

    return sqrt( c(d, ca, lenP-1, lenQ-1) / N )

In [113]:
%timeit -n1 -r2 psa_full.run(metric=frechet_v4)


1 loops, best of 2: 4.58 s per loop

In [65]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v4_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 5


In [114]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[::1] p, float[::1] q):
    cdef np.intp_t i
    cdef float s, temp
    s = 0.0
    for i in xrange(p.shape[0]):
        temp = p[i] - q[i]
        s += temp*temp
    return s

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float c(float[:,::1] d, float[:,::1] ca, np.intp_t i, np.intp_t j):
    if ca[i,j] != -1 : return ca[i,j]
    if i > 0:
        if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,i-1,j),c(d,ca,i,j-1)),c(d,ca,i-1,j-1)), d[i,j] )
        else:     ca[i,j] = fmax( c(d,ca,i-1,0), d[i,0] )
    elif j > 0:   ca[i,j] = fmax( c(d,ca,0,j-1), d[0,j] )
    else:         ca[i,j] = d[0,0]
    return        ca[i,j]


@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_v5(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t lenP, lenQ, i, j
    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    cdef float[:,::1] ca = -np.ones((lenP, lenQ), dtype='float32')

    for i in xrange(lenP):
        for j in xrange(lenQ):
            d[i,j] = sqnorm(P[i], Q[j])

    return sqrt( c(d, ca, lenP-1, lenQ-1) / N )

In [115]:
%timeit -n3 psa_full.run(metric=frechet_v5)


3 loops, best of 3: 215 ms per loop

In [82]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v5_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 6


In [93]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[::1] p, float[::1] q):
    cdef np.intp_t i
    cdef float s, temp
    s = 0.0
    for i in xrange(p.shape[0]):
        temp = p[i] - q[i]
        s += temp*temp
    return s

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float c(float[:,::1] d, float[:,::1] ca, np.intp_t i, np.intp_t j):
    if ca[i,j] != -1 : return ca[i,j]
    if i > 0:
        if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,i-1,j),c(d,ca,i,j-1)),c(d,ca,i-1,j-1)), d[i,j] )
        else:     ca[i,j] = fmax( c(d,ca,i-1,0), d[i,0] )
    elif j > 0:   ca[i,j] = fmax( c(d,ca,0,j-1), d[0,j] )
    else:         ca[i,j] = d[0,0]
    return        ca[i,j]


@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_v6(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t lenP, lenQ, i, j
    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    N = P.shape[1] # Check that Q.shape[1] is the same!
    
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    cdef float[:,::1] ca = -np.ones((lenP, lenQ), dtype='float32')

    for i in xrange(lenP):
        for j in xrange(lenQ):
            d[i,j] = sqnorm(P[i], Q[j])

    return sqrt( c(d, ca, lenP-1, lenQ-1) / N )

In [89]:
%timeit -n10 psa_full.run(metric=frechet_v6)


3 loops, best of 3: 209 ms per loop

In [82]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v6_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Version 6b

Reduced memview slices by working directly with P and Q


In [95]:
%%cython -f -c=-O3 -c=-march=native -c=-funroll-loops -c=-ffast-math
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython

ctypedef float (*fcn_ptr)(float[:,::1], float[:,::1], np.intp_t, np.intp_t)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float c(float[:,::1] d, float[:,::1] ca, np.intp_t i, np.intp_t j):
    cdef np.intp_t im1, jm1
    im1 = i-1
    jm1 = j-1
    if ca[i,j] != -1 : return ca[i,j]
    if i > 0:
        if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,im1,j),c(d,ca,i,jm1)),c(d,ca,im1,jm1)), d[i,j] )
        else:     ca[i,j] = fmax( c(d,ca,im1,0), d[i,0] )
    elif j > 0:   ca[i,j] = fmax( c(d,ca,0,jm1), d[0,j] )
    else:         ca[i,j] = d[0,0]
    return        ca[i,j]


@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_v6b(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t lenP, lenQ, i, j, k
    
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    cdef float[:,::1] ca = -np.ones((lenP, lenQ), dtype='float32')
    cdef float s, temp
    cdef fcn_ptr cf = &c
    
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                temp = P[i,k] - Q[j,k]
                s += temp*temp
            d[i,j] = s

    return sqrt( c(d, ca, lenP-1, lenQ-1) / N )

In [129]:
%timeit -n10 psa_full.run(metric=frechet_v6b)


10 loops, best of 3: 188 ms per loop

In [130]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v6b_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);

Optimized


In [ ]:
%%cython -f -c=-O3 -c=-march=native -c=-funroll-loops -c=-ffast-math
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython

ctypedef float (*cD_ptr)(float[:,::1], float[:,::1], np.intp_t, np.intp_t)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float cD(float[:,::1] d, float[:,::1] cd, np.intp_t i, np.intp_t j):
    cdef np.intp_t im1, jm1
    im1 = i-1
    jm1 = j-1
    if cd[i,j] != -1 : return cd[i,j]
    if i > 0:
        if j > 0: cd[i,j] = fmax( fmin(fmin(cD(d,cd,im1,j),cD(d,cd,i,jm1)),cD(d,cd,im1,jm1)), d[i,j] )
        else:     cd[i,j] = fmax( cD(d,cd,im1,0), d[i,0] )
    elif j > 0:   cd[i,j] = fmax( cD(d,cd,0,jm1), d[0,j] )
    else:         cd[i,j] = d[0,0]
    return        cd[i,j]

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k
    cdef float s, diff
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_opt(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    
    assert int(P.shape[1]) == int(3*N)
    assert P.shape[1] == Q.shape[1]
    
    cdef float[:,::1] d = rmsdMatrix_c(P, Q)
    cdef float[:,::1] cd = -np.ones((lenP, lenQ), dtype='float32')
    cdef cD_ptr couplingDistance = &cD

    return sqrt( couplingDistance(d, cd, lenP-1, lenQ-1) / N )

In [129]:
%timeit -n10 psa_full.run(metric=frechet_opt)


10 loops, best of 3: 188 ms per loop

In [130]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_opt_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);