In [1]:
%load_ext cython
%load_ext autoreload
%autoreload 2
In [2]:
from psa import *
from MDAnalysis import Universe
from MDAnalysis.analysis.align import rotation_matrix
import numpy as np
import os, sys
WORKDIR = '/nfs/homes/sseyler/Repositories/python/psanalysis'
sys.path.append(WORKDIR)
In [3]:
print("Generating AdK CORE C-alpha reference coords + structure...")
cref_filename = '%s/structs/1ake_a_ca_core.pdb' % WORKDIR
oref_filename = '%s/structs/4ake_a_ca_core.pdb' % WORKDIR
c_ref = MDAnalysis.Universe(cref_filename)
o_ref = MDAnalysis.Universe(oref_filename)
u_ref = MDAnalysis.Universe(cref_filename)
c_ref_ca = c_ref.selectAtoms('name CA')
o_ref_ca = o_ref.selectAtoms('name CA')
adkCORE_resids = "(resid 1:29 or resid 60:121 or resid 160:214)"
c_ref_CORE_ca = c_ref_ca.selectAtoms(adkCORE_resids).coordinates() \
- c_ref_ca.selectAtoms(adkCORE_resids).centerOfMass()
o_ref_CORE_ca = o_ref_ca.selectAtoms(adkCORE_resids).coordinates() \
- o_ref_ca.selectAtoms(adkCORE_resids).centerOfMass()
ref_coords = 0.5*(c_ref_CORE_ca + o_ref_CORE_ca)
u_ref.atoms.translate(-c_ref_ca.selectAtoms(adkCORE_resids).centerOfMass())
o_ref.atoms.translate(-o_ref_ca.selectAtoms(adkCORE_resids).centerOfMass())
u_ref.selectAtoms(adkCORE_resids).CA.set_positions(ref_coords)
In [142]:
print("Building collection of simulations...")
# List of method names (same as directory names)
# method_names = ['DIMS', 'FRODA', 'MAP']
method_names = ['DIMS', 'TMD', 'FRODA', 'MAP', 'iENM', 'MENM-SP', 'MENM-SD', \
'MDdMD', 'GOdMD', 'Morph', 'ANMP', 'LinInt']
labels = [] # Heat map labels
simulations = [] # List of simulation topology/trajectory filename pairs
universes = [] # List of MDAnalysis Universes representing simulations
# Build list of simulations, each represented by a pair of filenames
# ([topology filename], [trajectory filename]). Generate corresponding label
# list.
for method in method_names:
# Note: DIMS uses the PSF topology format
topname = 'top.psf' if ('DIMS' in method or 'TMD' in method) else 'top.pdb'
pathname = 'path.dcd'
method_dir = '{}/methods/{}'.format(WORKDIR, method)
if method is not 'LinInt':
if 'TMD' in method:
for run in xrange(1, 11): # 3 runs per method
run_dir = '{}/{:03n}'.format(method_dir, run)
topology = '{}/{}'.format(method_dir, topname)
trajectory = '{}/{}'.format(run_dir, pathname)
labels.append(method + '(' + str(run) + ')')
simulations.append((topology, trajectory))
# nruns = 3 if 'TMD' not in method else 9
else:
for run in xrange(1, nruns+1): # 3 runs per method
run_dir = '{}/{:03n}'.format(method_dir, run)
topology = '{}/{}'.format(method_dir, topname)
trajectory = '{}/{}'.format(run_dir, pathname)
labels.append(method + '(' + str(run) + ')')
simulations.append((topology, trajectory))
else: # only one LinInt trajectory
topology = '{}/{}'.format(method_dir, topname)
trajectory = '{}/{}'.format(method_dir, pathname)
labels.append(method)
simulations.append((topology, trajectory))
In [143]:
# Generate simulation list represented as Universes. Each item, sim, in
# simulations is a topology/trajectory filename pair that is unpacked into
# an argument list with the "splat" ("*") operator.
for sim in simulations:
universes.append(Universe(*sim))
In [144]:
print("Initializing Path Similarity Analysis...")
ref_selection = "name CA and " + adkCORE_resids
psa_full = PSA(universes, reference=u_ref, ref_select=ref_selection,
path_select="name CA", labels=labels)
In [145]:
print("Generating Path objects from aligned trajectories...")
psa_full.generate_paths(align=True, store=True)
In [146]:
# metric = 'discrete_frechet'
metric = frechet_v6b
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_ward_psa-withTMD-1-10.pdf'
In [147]:
print("Calculating distance matrix...")
psa_full.run(metric=metric)
In [148]:
print("Plotting heat map-dendrogram for hierarchical clustering...")
psa_full.plot(filename=plotname, linkage=linkage);
In [149]:
df_default = psa_full.D
In [150]:
# metric = 'hausdorff'
metric = hausdorff_v6
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_ward_psa-withTMD-1-10.pdf'
In [151]:
print("Calculating distance matrix...")
psa_full.run(metric=metric)
In [152]:
print("Plotting heat map-dendrogram for hierarchical clustering...")
psa_full.plot(filename=plotname, linkage=linkage);
In [153]:
dh_default = psa_full.D
In [16]:
%timeit -n3 psa_full.run(metric='hausdorff')
In [14]:
# %%cython -f -c=-Ofast -c=-march=native
# import numpy as np
# cimport numpy as np
# from libc.math cimport sqrt
# cimport cython
def sqnorm(v, axis=None):
return (v*v).sum(axis=axis)
def hausdorff_v0(P,Q, N=-1):
if N == -1:
N = P.shape[1] # number of atoms from 2nd dim of P
axis = (1,2)
else:
axis = 1
d = np.array([sqnorm(p - Q, axis) for p in P])
return ( max( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )**0.5
In [15]:
%timeit -n3 psa_full.run(metric=hausdorff_v0)
In [17]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v0_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [21]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython
def sqnorm(v):
return (v*v).sum(axis=1)
def hausdorff_v1(P, Q, N):
d = np.array([sqnorm(p - Q) for p in P])
return ( max( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )**0.5
In [22]:
%timeit -n3 psa_full.run(metric=hausdorff_v1)
In [23]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v1_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [24]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython
cdef np.ndarray sqnorm(np.ndarray v):
return (v*v).sum(axis=1)
def hausdorff_v2(P, Q, N):
d = np.array([sqnorm(p - Q) for p in P])
return ( max( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )**0.5
In [25]:
%timeit -n3 psa_full.run(metric=hausdorff_v2)
In [26]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v2_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [88]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython
ctypedef float (*fcn_ptr)(np.ndarray, np.ndarray)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v1, np.ndarray[float, ndim=1, mode='c'] v2):
cdef np.ndarray[float, ndim=1, mode='c'] diff = v1 - v2
return (diff*diff).sum()
@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v3(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.intp_t N):
cdef np.intp_t i, j, lenP, lenQ
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
cdef fcn_ptr f = &sqnorm
for i in xrange(lenP):
for j in xrange(lenQ):
d[i,j] = f(P[i], Q[j])
return sqrt( fmax( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )
In [89]:
%timeit -n1 psa_full.run(metric=hausdorff_v3)
Note: This is VERY slow compared to other solutions because the element-wise subtraction is happening with NumPy and cannot be fully converted to fast C code. Using the '-a' option with the %%cython command highlights the lines in sqnorm as not-very-Cythonic.
In [32]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v3_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [91]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython
ctypedef float (*fcn_ptr)(np.ndarray, np.ndarray)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v1, np.ndarray[float, ndim=1, mode='c'] v2):
cdef float diff, s
cdef np.intp_t k
s = 0.0
for k in xrange(v1.shape[0]):
diff = v1[k] - v2[k]
s += diff*diff
return s
@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v3b(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.intp_t N):
cdef np.intp_t i, j, lenP, lenQ
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
cdef fcn_ptr f = &sqnorm
for i in xrange(lenP):
for j in xrange(lenQ):
d[i,j] = f(P[i], Q[j])
return sqrt( fmax( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )
In [92]:
%timeit -n1 psa_full.run(metric=hausdorff_v3b)
In [93]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v3b_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [94]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython
cdef float sqnorm(float[:] p, float[:] q):
cdef np.intp_t i
cdef float s, temp
s = 0.0
for i in xrange(p.shape[0]):
temp = p[i] - q[i]
s += temp*temp
return s
def hausdorff_v4(float[:,:] P, float[:,:] Q, np.intp_t N):
cdef np.intp_t i, j, lenP, lenQ
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef float[:,:] d = np.empty((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
d[i,j] = sqnorm(P[i], Q[j])
return sqrt( fmax( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )
In [95]:
%timeit -n3 psa_full.run(metric=hausdorff_v4)
In [96]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v4_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [97]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[:] p, float[:] q):
cdef np.intp_t i
cdef float s, temp
s = 0.0
for i in xrange(p.shape[0]):
temp = p[i] - q[i]
s += temp*temp
return s
def hausdorff_v4b(float[:,:] P, float[:,:] Q, np.intp_t N):
cdef np.intp_t i, j, lenP, lenQ
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef float[:,:] d = np.empty((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
d[i,j] = sqnorm(P[i], Q[j])
return sqrt( fmax( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )
In [98]:
%timeit -n3 psa_full.run(metric=hausdorff_v4b)
In [99]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v4b_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [83]:
%%cython -f -c=-O3 -c=-march=native -c=-funroll-loops -c=-ffast-math
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax
import cython
cimport cython
ctypedef float (*fcn_ptr)(float[::1], float[::1])
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[::1] p, float[::1] q):
cdef np.intp_t k
cdef float s, temp
s = 0.0
for k in xrange(p.shape[0]):
temp = p[k] - q[k]
s += temp*temp
return s
@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v5(float[:,::1] P, float[:,::1] Q, np.intp_t N):
cdef np.intp_t i, j, lenP, lenQ
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
cdef fcn_ptr f = &sqnorm
for i in xrange(lenP):
for j in xrange(lenQ):
d[i,j] = f(P[i,:], Q[j,:])
return sqrt( fmax( np.amax(np.amin(d, axis=0)), \
np.amax(np.amin(d, axis=1)) ) / N )
In [84]:
%timeit -n10 psa_full.run(metric=hausdorff_v5)
In [74]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v5_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [106]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax, fmin
import cython
cimport cython
ctypedef float[::1] (*minfcn_ptr)(float[:,::1])
ctypedef float (*maxfcn_ptr)(float[::1])
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[::1] getmin(float[:,::1] d):
cdef np.intp_t i, j
cdef float[::1] d_row = np.empty(d.shape[1], dtype='float32')
cdef float[::1] rowmin = np.empty(d.shape[0], dtype='float32')
cdef float m = 10000000
for i in xrange(d.shape[0]):
d_row = d[i,:]
for j in xrange(d.shape[1]):
m = fmin(d_row[j], m)
rowmin[i] = m
return rowmin
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float getmax(float[::1] d):
cdef np.intp_t i
cdef float m = -1
for i in xrange(d.shape[0]):
m = fmax(d[i], m)
return m
@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_v6(float[:,::1] P, float[:,::1] Q, np.intp_t N):
cdef np.intp_t i, j, k, lenP, lenQ
lenP = P.shape[0]
lenQ = Q.shape[0]
assert int(P.shape[1]) == int(3*N)
assert P.shape[1] == Q.shape[1]
cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
cdef float s, diff = 0.0
cdef minfcn_ptr minf = &getmin
cdef maxfcn_ptr maxf = &getmax
for i in xrange(lenP):
for j in xrange(lenQ):
s = 0.0
for k in xrange(P.shape[1]):
diff = P[i,k] - Q[j,k]
s += diff*diff
d[i,j] = s
# return sqrt( fmax( maxf(minf(d)), maxf(np.amin(d, axis=1)) ) / N )
# return sqrt( fmax( maxf(minf(d)), maxf(minf(d.T)) ) / N )
return sqrt( fmax( np.amax(np.amin(d, axis=0)), np.amax(np.amin(d, axis=1)) ) / N )
In [95]:
%timeit -n10 psa_full.run(metric=hausdorff_v6)
The custom Cython functions are as fast as the numpy.amax/amin solution
In [90]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_v6_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [ ]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax, fmin
import cython
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
cdef np.intp_t lenP = P.shape[0]
cdef np.intp_t lenQ = Q.shape[0]
cdef np.intp_t i, j, k
cdef float s, diff
cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
s = 0.0
for k in xrange(P.shape[1]):
diff = P[i,k] - Q[j,k]
s += diff*diff
d[i,j] = s
return d
@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_opt(float[:,::1] P, float[:,::1] Q, np.intp_t N):
cdef np.intp_t lenP = P.shape[0]
cdef np.intp_t lenQ = Q.shape[0]
cdef np.intp_t i, j, k
assert int(P.shape[1]) == int(3*N)
assert P.shape[1] == Q.shape[1]
cdef float[:,::1] d = rmsdMatrix_c(P, Q)
return sqrt( fmax( np.amax(np.amin(d, axis=0)), np.amax(np.amin(d, axis=1)) ) / N )
In [95]:
%timeit -n10 psa_full.run(metric=hausdorff_opt)
In [90]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_opt_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [119]:
%timeit -n3 psa_full.run(metric='discrete_frechet')
In [97]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython
def sqnorm(v):
return (v*v).sum(axis=1)
def frechet_v1(P, Q, N):
Np, Nq = len(P), len(Q)
d = np.array([sqnorm(p - Q) for p in P])
ca = -np.ones((Np, Nq))
def c(i, j):
if ca[i,j] != -1 : return ca[i,j]
if i > 0:
if j > 0: ca[i,j] = max( min(c(i-1,j),c(i,j-1),c(i-1,j-1)), d[i,j] )
else: ca[i,j] = max( c(i-1,0), d[i,0] )
elif j > 0: ca[i,j] = max( c(0,j-1), d[0,j] )
else: ca[i,j] = d[0,0]
return ca[i,j]
return ( c(Np-1, Nq-1) / N )**0.5
In [98]:
%timeit -n3 psa_full.run(metric=frechet_v1)
In [99]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v1_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [100]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
# from libc.math cimport sqrt
cimport cython
cdef np.ndarray sqnorm(np.ndarray v):
return (v*v).sum(axis=1)
def frechet_v2(P, Q, N):
Np, Nq = len(P), len(Q)
d = np.array([sqnorm(p - Q) for p in P])
ca = -np.ones((Np, Nq))
def c(i, j):
if ca[i,j] != -1 : return ca[i,j]
if i > 0:
if j > 0: ca[i,j] = max( min(c(i-1,j),c(i,j-1),c(i-1,j-1)), d[i,j] )
else: ca[i,j] = max( c(i-1,0), d[i,0] )
elif j > 0: ca[i,j] = max( c(0,j-1), d[0,j] )
else: ca[i,j] = d[0,0]
return ca[i,j]
return ( c(Np-1, Nq-1) / N )**0.5
In [101]:
%timeit -n3 psa_full.run(metric=frechet_v2)
In [102]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v2_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [108]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython
cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v):
return (v*v).sum()
def frechet_v3(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.uint32_t N):
cdef np.intp_t lenP, lenQ, i, j
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef np.ndarray[float, ndim=1, mode='c'] temp
cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
cdef np.ndarray[float, ndim=2, mode='c'] ca = -np.ones((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
temp = P[i]-Q[j]
d[i,j] = sqnorm(temp)
def c(np.ndarray[float, ndim=2, mode='c'] d, np.ndarray[float, ndim=2, mode='c'] ca, np.intp_t i, np.intp_t j):
if ca[i,j] != -1 : return ca[i,j]
if i > 0:
if j > 0: ca[i,j] = max( min(c(d,ca,i-1,j),c(d,ca,i,j-1),c(d,ca,i-1,j-1)), d[i,j] )
else: ca[i,j] = max( c(d,ca,i-1,0), d[i,0] )
elif j > 0: ca[i,j] = max( c(d,ca,0,j-1), d[0,j] )
else: ca[i,j] = d[0,0]
return ca[i,j]
return sqrt( c(d, ca, lenP-1, lenQ-1) / N )
In [109]:
%timeit -n1 psa_full.run(metric=frechet_v3)
In [59]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v3_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [112]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython
cdef float sqnorm(np.ndarray[float, ndim=1, mode='c'] v):
return (v*v).sum()
cdef float c(np.ndarray[float, ndim=2, mode='c'] d, np.ndarray[float, ndim=2, mode='c'] ca, np.intp_t i, np.intp_t j):
if ca[i,j] != -1 : return ca[i,j]
if i > 0:
if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,i-1,j),c(d,ca,i,j-1)),c(d,ca,i-1,j-1)), d[i,j] )
else: ca[i,j] = fmax( c(d,ca,i-1,0), d[i,0] )
elif j > 0: ca[i,j] = fmax( c(d,ca,0,j-1), d[0,j] )
else: ca[i,j] = d[0,0]
return ca[i,j]
def frechet_v4(np.ndarray[float, ndim=2, mode='c'] P, np.ndarray[float, ndim=2, mode='c'] Q, np.uint32_t N):
cdef np.intp_t lenP, lenQ, i, j
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef np.ndarray[float, ndim=1, mode='c'] temp
cdef np.ndarray[float, ndim=2, mode='c'] d = np.empty((lenP, lenQ), dtype='float32')
cdef np.ndarray[float, ndim=2, mode='c'] ca = -np.ones((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
temp = P[i]-Q[j]
d[i,j] = sqnorm(temp)
return sqrt( c(d, ca, lenP-1, lenQ-1) / N )
In [113]:
%timeit -n1 -r2 psa_full.run(metric=frechet_v4)
In [65]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v4_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [114]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[::1] p, float[::1] q):
cdef np.intp_t i
cdef float s, temp
s = 0.0
for i in xrange(p.shape[0]):
temp = p[i] - q[i]
s += temp*temp
return s
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float c(float[:,::1] d, float[:,::1] ca, np.intp_t i, np.intp_t j):
if ca[i,j] != -1 : return ca[i,j]
if i > 0:
if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,i-1,j),c(d,ca,i,j-1)),c(d,ca,i-1,j-1)), d[i,j] )
else: ca[i,j] = fmax( c(d,ca,i-1,0), d[i,0] )
elif j > 0: ca[i,j] = fmax( c(d,ca,0,j-1), d[0,j] )
else: ca[i,j] = d[0,0]
return ca[i,j]
@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_v5(float[:,::1] P, float[:,::1] Q, np.intp_t N):
cdef np.intp_t lenP, lenQ, i, j
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
cdef float[:,::1] ca = -np.ones((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
d[i,j] = sqnorm(P[i], Q[j])
return sqrt( c(d, ca, lenP-1, lenQ-1) / N )
In [115]:
%timeit -n3 psa_full.run(metric=frechet_v5)
In [82]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v5_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [93]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float sqnorm(float[::1] p, float[::1] q):
cdef np.intp_t i
cdef float s, temp
s = 0.0
for i in xrange(p.shape[0]):
temp = p[i] - q[i]
s += temp*temp
return s
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float c(float[:,::1] d, float[:,::1] ca, np.intp_t i, np.intp_t j):
if ca[i,j] != -1 : return ca[i,j]
if i > 0:
if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,i-1,j),c(d,ca,i,j-1)),c(d,ca,i-1,j-1)), d[i,j] )
else: ca[i,j] = fmax( c(d,ca,i-1,0), d[i,0] )
elif j > 0: ca[i,j] = fmax( c(d,ca,0,j-1), d[0,j] )
else: ca[i,j] = d[0,0]
return ca[i,j]
@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_v6(float[:,::1] P, float[:,::1] Q, np.intp_t N):
cdef np.intp_t lenP, lenQ, i, j
lenP = P.shape[0]
lenQ = Q.shape[0]
N = P.shape[1] # Check that Q.shape[1] is the same!
cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
cdef float[:,::1] ca = -np.ones((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
d[i,j] = sqnorm(P[i], Q[j])
return sqrt( c(d, ca, lenP-1, lenQ-1) / N )
In [89]:
%timeit -n10 psa_full.run(metric=frechet_v6)
In [82]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v6_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [95]:
%%cython -f -c=-O3 -c=-march=native -c=-funroll-loops -c=-ffast-math
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython
ctypedef float (*fcn_ptr)(float[:,::1], float[:,::1], np.intp_t, np.intp_t)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float c(float[:,::1] d, float[:,::1] ca, np.intp_t i, np.intp_t j):
cdef np.intp_t im1, jm1
im1 = i-1
jm1 = j-1
if ca[i,j] != -1 : return ca[i,j]
if i > 0:
if j > 0: ca[i,j] = fmax( fmin(fmin(c(d,ca,im1,j),c(d,ca,i,jm1)),c(d,ca,im1,jm1)), d[i,j] )
else: ca[i,j] = fmax( c(d,ca,im1,0), d[i,0] )
elif j > 0: ca[i,j] = fmax( c(d,ca,0,jm1), d[0,j] )
else: ca[i,j] = d[0,0]
return ca[i,j]
@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_v6b(float[:,::1] P, float[:,::1] Q, np.intp_t N):
cdef np.intp_t lenP, lenQ, i, j, k
lenP = P.shape[0]
lenQ = Q.shape[0]
cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
cdef float[:,::1] ca = -np.ones((lenP, lenQ), dtype='float32')
cdef float s, temp
cdef fcn_ptr cf = &c
for i in xrange(lenP):
for j in xrange(lenQ):
s = 0.0
for k in xrange(P.shape[1]):
temp = P[i,k] - Q[j,k]
s += temp*temp
d[i,j] = s
return sqrt( c(d, ca, lenP-1, lenQ-1) / N )
In [129]:
%timeit -n10 psa_full.run(metric=frechet_v6b)
In [130]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_v6b_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);
In [ ]:
%%cython -f -c=-O3 -c=-march=native -c=-funroll-loops -c=-ffast-math
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython
ctypedef float (*cD_ptr)(float[:,::1], float[:,::1], np.intp_t, np.intp_t)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float cD(float[:,::1] d, float[:,::1] cd, np.intp_t i, np.intp_t j):
cdef np.intp_t im1, jm1
im1 = i-1
jm1 = j-1
if cd[i,j] != -1 : return cd[i,j]
if i > 0:
if j > 0: cd[i,j] = fmax( fmin(fmin(cD(d,cd,im1,j),cD(d,cd,i,jm1)),cD(d,cd,im1,jm1)), d[i,j] )
else: cd[i,j] = fmax( cD(d,cd,im1,0), d[i,0] )
elif j > 0: cd[i,j] = fmax( cD(d,cd,0,jm1), d[0,j] )
else: cd[i,j] = d[0,0]
return cd[i,j]
@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
cdef np.intp_t lenP = P.shape[0]
cdef np.intp_t lenQ = Q.shape[0]
cdef np.intp_t i, j, k
cdef float s, diff
cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
for i in xrange(lenP):
for j in xrange(lenQ):
s = 0.0
for k in xrange(P.shape[1]):
diff = P[i,k] - Q[j,k]
s += diff*diff
d[i,j] = s
return d
@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_opt(float[:,::1] P, float[:,::1] Q, np.intp_t N):
cdef np.intp_t lenP = P.shape[0]
cdef np.intp_t lenQ = Q.shape[0]
assert int(P.shape[1]) == int(3*N)
assert P.shape[1] == Q.shape[1]
cdef float[:,::1] d = rmsdMatrix_c(P, Q)
cdef float[:,::1] cd = -np.ones((lenP, lenQ), dtype='float32')
cdef cD_ptr couplingDistance = &cD
return sqrt( couplingDistance(d, cd, lenP-1, lenQ-1) / N )
In [129]:
%timeit -n10 psa_full.run(metric=frechet_opt)
In [130]:
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_opt_ward_psa-full.pdf'
psa_full.plot(filename=plotname, linkage=linkage);