In [1]:
%load_ext cython
%load_ext autoreload
%autoreload 2

In [4]:
from psa import *
from MDAnalysis import Universe
from MDAnalysis.analysis.align import rotation_matrix
import numpy as np
import os, sys

# Display images from filesystem
from IPython.display import Image       # for PNGs
from wand.image import Image as WImage  # for PDFs

WORKDIR = '/nfs/homes/sseyler/Repositories/python/psanalysis'
# WORKDIR = '/Users/sseyler/Repositories/python/psanalysis'
sys.path.append(WORKDIR)

In [5]:
print("Generating AdK CORE C-alpha reference coords + structure...")
cref_filename = '%s/structs/1ake_a_ca_core.pdb' % WORKDIR
oref_filename = '%s/structs/4ake_a_ca_core.pdb' % WORKDIR

c_ref = MDAnalysis.Universe(cref_filename)
o_ref = MDAnalysis.Universe(oref_filename)
u_ref = MDAnalysis.Universe(cref_filename)

c_ref_ca = c_ref.selectAtoms('name CA')
o_ref_ca = o_ref.selectAtoms('name CA')

adkCORE_resids = "(resid 1:29 or resid 60:121 or resid 160:214)"
c_ref_CORE_ca = c_ref_ca.selectAtoms(adkCORE_resids).coordinates() \
        - c_ref_ca.selectAtoms(adkCORE_resids).centerOfMass()
o_ref_CORE_ca = o_ref_ca.selectAtoms(adkCORE_resids).coordinates() \
        - o_ref_ca.selectAtoms(adkCORE_resids).centerOfMass()
ref_coords = 0.5*(c_ref_CORE_ca + o_ref_CORE_ca)

u_ref.atoms.translate(-c_ref_ca.selectAtoms(adkCORE_resids).centerOfMass())
o_ref.atoms.translate(-o_ref_ca.selectAtoms(adkCORE_resids).centerOfMass())
u_ref.selectAtoms(adkCORE_resids).CA.set_positions(ref_coords)


Generating AdK CORE C-alpha reference coords + structure...

In [6]:
print("Building collection of simulations...")
# List of method names (same as directory names)
# method_names = ['DIMS', 'FRODA', 'MAP']
# method_names = ['DIMS', 'FRODA', 'GOdMD', 'MDdMD', 'TMD-F', 'TMD-S',          \
#                 'ANMP', 'iENM', 'MAP', 'MENM-SD', 'MENM-SP',       \
#                 'Morph', 'LinInt']
method_names = ['Morph', 'LinInt']
labels = [] # Heat map labels
simulations = [] # List of simulation topology/trajectory filename pairs

# Build list of simulations, each represented by a pair of filenames
# ([topology filename], [trajectory filename]). Generate corresponding label
# list.
for method in method_names:
    # Note: DIMS uses the PSF topology format
    topname = 'top.psf' if ('DIMS' in method or 'TMD' in method) else 'top.pdb'
    pathname = 'path.dcd'
    method_dir = '{}/methods/{}'.format(WORKDIR, method)
    if method is not 'LinInt':
#         nruns = 3 if 'TMD' not in method else 3
        nruns = 1
#         if method == 'TMD-F':
#             for run in xrange(1, nruns+1): # 3 runs per method
#                 run_dir = '{}/{:03n}'.format(method_dir, run)
#                 topology = '{}/{}'.format(method_dir, topname)
#                 trajectory = '{}/{}'.format(run_dir, pathname)
#                 labels.append(method + '(' + str(run) + ')')
#                 simulations.append((topology, trajectory))
#         elif method == 'TMD-S':
#             for run in xrange(4, nruns+4): # 3 runs per method
#                 run_dir = '{}/{:03n}'.format(method_dir, run)
#                 topology = '{}/{}'.format(method_dir, topname)
#                 trajectory = '{}/{}'.format(run_dir, pathname)
#                 labels.append(method + '(' + str(run) + ')')
#                 simulations.append((topology, trajectory))
#         else:
        for run in xrange(1, nruns+1): # 3 runs per method
            run_dir = '{}/{:03n}'.format(method_dir, run)
            topology = '{}/{}'.format(method_dir, topname)
            trajectory = '{}/{}'.format(run_dir, pathname)
            labels.append(method + '(' + str(run) + ')')
            simulations.append((topology, trajectory))
    else: # only one LinInt trajectory
        topology = '{}/{}'.format(method_dir, topname)
        trajectory = '{}/{}'.format(method_dir, pathname)
        labels.append(method)
        simulations.append((topology, trajectory))


Building collection of simulations...

In [7]:
# Generate simulation list represented as Universes. Each item, sim, in
# simulations is a topology/trajectory filename pair that is unpacked into
# an argument list with the "splat" ("*") operator.
universes = [] # List of MDAnalysis Universes representing simulations
for sim in simulations:
    if 'tmd' in trajectory:
        universes.append(Universe(*sim), format="LAMMPS")
    else:
        universes.append(Universe(*sim))

In [8]:
print("Initializing Path Similarity Analysis...")
ref_selection = "name CA and " + adkCORE_resids
psa_full = PSA(universes, reference=u_ref, ref_select=ref_selection,
                    path_select="name CA", labels=labels)


Initializing Path Similarity Analysis...

In [9]:
print("Generating Path objects from aligned trajectories...")
psa_full.generate_paths(align=True, store=True)


/nfs/homes/sseyler/.local/lib/python2.7/site-packages/MDAnalysis/analysis/align.py:475: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  natoms, rot, weight)
Fitted frame   100/100  [100.0%]
Fitted frame   100/100  [100.0%]
Generating Path objects from aligned trajectories...

=============================


In [12]:
from scipy.spatial.distance import squareform
from scipy.cluster.hierarchy import cophenet, correspond, inconsistent, maxinconsts

In [16]:
metric = frechet
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df_ward_namd-tmd.pdf'

In [17]:
psa_full.run(metric=metric)
Z, dgram = psa_full.plot(filename=plotname, linkage=linkage);

In [18]:
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
# Image(filename=(imgpath), width=650)     # Display PNG from filesystem
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[18]:

In [45]:
df_default = psa_full.D

In [40]:
Y = squareform(psa_full.D)
c, d = cophenet(Z, Y)
cor = correspond(Z, Y)
R = inconsistent(Z)
maxc = maxinconsts(Z, R)
print cor, c
print np.mean(maxc), np.mean(R[:,-1])
# print R


True 0.740510749535
0.584745927185 0.529643568915

In [41]:
plotname = 'df_avg_new.pdf'
Z, dgram = psa_full.plot(filename=plotname, linkage='average');
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[41]:

In [42]:
plotname = 'df_comp_new.pdf'
Z, dgram = psa_full.plot(filename=plotname, linkage='complete');
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[42]:

In [43]:
plotname = 'df_weight_new.pdf'
Z, dgram = psa_full.plot(filename=plotname, linkage='weighted');
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[43]:

In [44]:
plotname = 'df_single_new.pdf'
Z, dgram = psa_full.plot(filename=plotname, linkage='single');
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[44]:


In [46]:
# metric = 'hausdorff'
metric = hausdorff
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh_ward_namd-tmd.pdf'

In [47]:
psa_full.run(metric=metric)
psa_full.plot(filename=plotname, linkage=linkage);

In [21]:
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
# Image(filename=(imgpath), width=650)     # Display PNG from filesystem
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[21]:

In [48]:
dh_default = psa_full.D

In [ ]:
Z, dgram = psa_methods.plot(filename='df_ward-withtmd.svg', linkage='ward', figsize=4.5, labelsize=12,
                            distance_sort=False, count_sort=False)
Y = squareform(psa_methods.D)
c, d = cophenet(Z, Y)
cor = correspond(Z, Y)
R = inconsistent(Z)
maxc = maxinconsts(Z, R)


In [51]:
vdf = squareform(df_default)
vdh = squareform(dh_default)

In [52]:
np.max(np.abs(df_default-dh_default))


Out[52]:
0.19468304440920203

In [53]:
from __future__ import unicode_literals
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import pandas as pd
from pylab import cm, savefig

In [54]:
color = sns.color_palette()
color_set1 = sns.color_palette("Set1", 9)
color_set2 = sns.color_palette("Set2", 8)

In [55]:
datalen = len(vdf)
dframe = pd.DataFrame()
d = {'Discrete Fréchet' : vdf,
     'Hausdorff' : vdh}
dframe = dframe.append(pd.DataFrame(d))

In [67]:
plotname = 'psa_correlation.pdf'
color = sns.xkcd_rgb["denim blue"]
with sns.axes_style("darkgrid"):
    g = sns.jointplot('Discrete Fréchet', 'Hausdorff', data=dframe, kind="reg",
                      ratio=3, xlim=(0,5), ylim=(0,5), color=color, size=5);
    savefig('psadata/plots/' + plotname, dpi=600)


/nfs/homes/sseyler/.local/lib/python2.7/site-packages/matplotlib/pyplot.py:412: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_num_figures`).
  max_open_warning, RuntimeWarning)

In [68]:
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
WImage(filename=(imgpath), resolution=100) # Display PDF from filesystem


Out[68]:

In [62]:
import matplotlib as mpl

mpl.use('agg')
mpl.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
mpl.rc('text', usetex=False)


/nfs/homes/sseyler/.local/lib/python2.7/site-packages/matplotlib/__init__.py:1155: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)

In [79]:
plotname = 'psa_correlation'
color = color_set1[1]
with sns.axes_style("darkgrid"):
    sns.set_context('talk')
    g = sns.JointGrid('Discrete Fréchet', 'Hausdorff', dframe, size=3.75,
                      ratio=3, space=0.1, xlim=(0,5), ylim=(0,5))
    g.plot_marginals(sns.distplot, kde=True, color=color)
    g.plot_joint(plt.scatter, color=color, edgecolor="white", s=8)
    g.annotate(stats.pearsonr, template="{stat} = {val:.3f} (p = {p:.3g})",
              loc=4);
    savefig('psadata/plots/' + plotname + '.pdf', dpi=600)
    savefig('psadata/plots/' + plotname + '.png', dpi=600)
    savefig('psadata/plots/' + plotname + '.svg', dpi=600)

In [80]:
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname + '.pdf'
WImage(filename=(imgpath), resolution=100) # Display PDF from filesystem


Out[80]:

=============================


In [23]:
print("Calculating distance matrix...")
%timeit -r3 psa_full.run(metric=frechet)
df1 = psa_full.D


Calculating distance matrix...
1 loops, best of 3: 1.26 s per loop

In [117]:
print("Calculating distance matrix...")
%timeit -r3 psa_full.run(metric=frechet2)
df2 = psa_full.D


Calculating distance matrix...
1 loops, best of 3: 1.36 s per loop

In [118]:
np.equal(df1, df2).all()


Out[118]:
True

In [24]:
print("Calculating distance matrix...")
%timeit -r3 psa_full.run(metric=hausdorff)
dh1 = psa_full.D


Calculating distance matrix...
1 loops, best of 3: 1.04 s per loop

In [18]:
print("Calculating distance matrix...")
%timeit -r3 psa_full.run(metric=hausdorff)
dh2 = psa_full.D


Calculating distance matrix...
1 loops, best of 3: 1.03 s per loop

In [123]:
np.equal(dh1, dh2).all()


Out[123]:
True

=============================

Hausdorff


In [14]:
%%cython -f -c=-Ofast -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax, fmin
import cython
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k
    cdef float s, diff
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    assert (int(P.shape[1]) == int(3*N) and P.shape[1] == Q.shape[1])
    cdef float[:,::1] d = rmsdMatrix_c(P, Q)

    return sqrt( fmax( np.amax(np.amin(d, axis=0)), np.amax(np.amin(d, axis=1)) ) / N  )

Frechet


In [15]:
%%cython -f -c=-O3
#-c=-funroll-loops -c=-ffast-math -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython

ctypedef float (*cD_ptr)(float[:,::1], float[:,::1], np.intp_t, np.intp_t)

cdef float fmin3(float a, float b, float c):
    return fmin(fmin(a,b),c)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float cD(float[:,::1] d, float[:,::1] cd, np.intp_t i, np.intp_t j):
    cdef np.intp_t im1 = i-1
    cdef np.intp_t jm1 = j-1
    if cd[i,j] != -1 : return cd[i,j]
    if i > 0:
        if j > 0: cd[i,j] = fmax( fmin3(cD(d,cd,i,jm1),cD(d,cd,im1,jm1),cD(d,cd,im1,j)), d[i,j] )
        else:     cd[i,0] = fmax( cD(d,cd,im1,0), d[i,0] )
    elif j > 0:   cd[0,j] = fmax( cD(d,cd,0,jm1), d[0,j] )
    else:         cd[0,0] = d[0,0]
    return        cd[i,j]

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k
    cdef float s, diff
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

@cython.boundscheck(False)
@cython.wraparound(False)
def frechet(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    assert (int(P.shape[1]) == int(3*N) and P.shape[1] == Q.shape[1])
    cdef float[:,::1] d = rmsdMatrix_c(P, Q)
    cdef float[:,::1] cd = -np.ones((lenP, lenQ), dtype='float32')
    cdef cD_ptr couplingDistance = &cD

    return sqrt( couplingDistance(d, cd, lenP-1, lenQ-1) / N )

Average Hausdorff


In [19]:
%%cython -f -c=-Ofast
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax, fmin
import cython
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k
    cdef float s, diff
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_avg(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    assert (int(P.shape[1]) == int(3*N) and P.shape[1] == Q.shape[1])
    cdef float[:,::1] d = rmsdMatrix_c(P, Q)

    return sqrt( np.mean(np.amax(np.amin(d, axis=0))) + np.mean(np.amax(np.amin(d, axis=1)))  / (2*N)  )

In [20]:
%%cython -f -c=-Ofast
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, fmax, fmin
import cython
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k
    cdef float s, diff
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

@cython.boundscheck(False)
@cython.wraparound(False)
def hausdorff_avg2(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    assert (int(P.shape[1]) == int(3*N) and P.shape[1] == Q.shape[1])
    cdef float[:,::1] d = rmsdMatrix_c(P, Q)

    return sqrt( np.mean( np.append(np.amax(np.amin(d, axis=0)), np.amax(np.amin(d, axis=1))) )  / N  )

In [24]:
metric = hausdorff_avg2
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'dh-avg_ward_namd-tmd.pdf'

In [25]:
psa_full.run(metric=metric)
Z, dgram = psa_full.plot(filename=plotname, linkage=linkage);

In [26]:
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[26]:

In [ ]:
x = np.array([1,2,3])
y = np.array([4,5,6])

Average Frechet


In [ ]:
def c(i, j):
    if ca[i,j] != -1 : return ca[i,j]
    if i > 0:
        if j > 0: ca[i,j] = max( min(c(i-1,j),c(i,j-1),c(i-1,j-1)), d[i,j] )
        else:     ca[i,j] = max( c(i-1,0), d[i,0] )
    elif j > 0:   ca[i,j] = max( c(0,j-1), d[0,j] )
    else:         ca[i,j] = d[0,0]
    return        ca[i,j]

In [ ]:
def c(i, j):
    if cd[i,j] != -1 : return ca[i,j]
    
    if i > 0:
        if j > 0:
            if c(i-1,j) < c(i,j-1):
                if c(i-1,j) < c(i-1,j-1):
                    cd[i,j] = c(i-1,j)
                    cl[i,j] = cl[i-1,j] + 1
            elif c(i,j-1) < c(i-1,j-1):
                cd[i,j] = c(i,j-1)
                cl[i,j] = cl[i,j-1] + 1
            else:
                cd[i,j] = c(i-1,j-1)
                cl[i,j] = cl[i-1,j-1] + 1
        else:
            cd[i,0] = c(i-1,0) + d[i,0]
            cl[i,0] = cl[i-1,0] + 1
    elif j > 0:
        cd[0,j] = c(0,j-1) + d[0,j]
        cl[0,j] = cl[0,j-1] + 1
    else:
        cd[0,0] = d[0,0]
        cl[0,0] = 1

    return ca[i,j]

In [85]:
%%cython -f -c=-O3
#-c=-funroll-loops -c=-ffast-math -c=-march=native
import numpy as np
cimport numpy as np
from libc.math cimport fmin, fmax, sqrt
cimport cython

ctypedef float[::1] (*c_ptr)(float[:,::1], float[:,:,::1], np.intp_t, np.intp_t)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[::1] c(float[:,::1] d, float[:,:,::1] cd, np.intp_t i, np.intp_t j):
    cdef float[::1] cim1j, cijm1, cim1jm1 = np.empty((2,), dtype='float32')
    if cd[i,j,0] != -1 : return cd[i,j]
    if i > 0:
        if j > 0:
            cim1j = c(d,cd,i-1,j)
            cijm1 = c(d,cd,i,j-1)
            cim1jm1 = c(d,cd,i-1,j-1)
            if cim1j[0] < cijm1[0]:
                if cim1j[0] < cim1jm1[0]:
                    cd[i,j,1] = cim1j[1] + 1
                    cd[i,j,0] = (cim1j[1]*cim1j[0] + d[i,j])/cd[i,j,1]
            elif cijm1[0] < cim1jm1[0]:
                cd[i,j,1] = cijm1[1] + 1
                cd[i,j,0] = (cijm1[1]*cijm1[0] + d[i,j])/cd[i,j,1]
            else:
                cd[i,j,1] = cim1jm1[1] + 1
                cd[i,j,0] = (cim1jm1[1]*cim1jm1[0] + d[i,j])/cd[i,j,1]
        else:
            cim1j = c(d,cd,i-1,j)
            cd[i,j,0] = (i*cim1j[0] + d[i,j])/(i+1)
            cd[i,j,1] = cim1j[1] + 1
    elif j > 0:
        cijm1 = c(d,cd,i,j-1)
        cd[i,j,0] = (j*cijm1[0] + d[i,j])/(j+1)
        cd[i,j,1] = cijm1[1] + 1
    else:
        cd[i,j,0] = d[i,j]
        cd[i,j,1] = 1
    return cd[i,j]

@cython.boundscheck(False)
@cython.wraparound(False)
cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    cdef np.intp_t i, j, k
    cdef float s, diff
    cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

@cython.boundscheck(False)
@cython.wraparound(False)
def frechet_avg(float[:,::1] P, float[:,::1] Q, np.intp_t N):
    cdef np.intp_t lenP = P.shape[0]
    cdef np.intp_t lenQ = Q.shape[0]
    assert (int(P.shape[1]) == int(3*N) and P.shape[1] == Q.shape[1])
    cdef float[:,::1] d = rmsdMatrix_c(P, Q)
    cdef float[:,:,::1] cd = -np.ones((lenP, lenQ, 2), dtype='float32')
    cdef c_ptr couplingDistance = &c
    
    cdcl = couplingDistance(d, cd, lenP-1, lenQ-1)
    print cdcl[0], cdcl[1]
    return sqrt( cdcl[0] / N )

In [13]:
import numpy as np

def c(d, cd, i, j):
    if cd[i,j,0] != -1 : return cd[i,j]
    if i > 0:
        if j > 0:
            cim1j = c(d,cd,i-1,j)
            cijm1 = c(d,cd,i,j-1)
            cim1jm1 = c(d,cd,i-1,j-1)
            if cim1j[0] < cijm1[0]:
                if cim1j[0] < cim1jm1[0]:
                    cd[i,j,1] = cim1j[1] + 1
                    cd[i,j,0] = (cim1j[1]*cim1j[0] + d[i,j])/cd[i,j,1]
            elif cijm1[0] < cim1jm1[0]:
                cd[i,j,1] = cijm1[1] + 1
                cd[i,j,0] = (cijm1[1]*cijm1[0] + d[i,j])/cd[i,j,1]
            else:
                cd[i,j,1] = cim1jm1[1] + 1
                cd[i,j,0] = (cim1jm1[1]*cim1jm1[0] + d[i,j])/cd[i,j,1]
        else:
            cim1j = c(d,cd,i-1,j)
            cd[i,j,0] = (i*cim1j[0] + d[i,j])/(i+1)
            cd[i,j,1] = cim1j[1] + 1
    elif j > 0:
        cijm1 = c(d,cd,i,j-1)
        cd[i,j,0] = (j*cijm1[0] + d[i,j])/(j+1)
        cd[i,j,1] = cijm1[1] + 1
    else:
        cd[i,j,0] = d[i,j]
        cd[i,j,1] = 1
    return cd[i,j]

def rmsdMatrix_c(P, Q):
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    d = np.empty((lenP, lenQ), dtype='float32')
    for i in xrange(lenP):
        for j in xrange(lenQ):
            s = 0.0
            for k in xrange(P.shape[1]):
                diff = P[i,k] - Q[j,k]
                s += diff*diff
            d[i,j] = s
    return d

def frechet_avg( P, Q, N):
    lenP = P.shape[0]
    lenQ = Q.shape[0]
    assert (int(P.shape[1]) == int(3*N) and P.shape[1] == Q.shape[1])
    d = rmsdMatrix_c(P, Q)
    cd = -np.ones((lenP, lenQ, 2), dtype='float32')
    
    cdcl = c(d, cd, lenP-1, lenQ-1)
    print np.sqrt( cdcl[0] / N ), cdcl[1]
    return np.sqrt( cdcl[0] / N )

In [14]:
metric = frechet_avg
linkage = 'ward' # 'single' 'complete' 'weighted' 'average'
plotname = 'df-avg_ward_namd-tmd.pdf'

In [15]:
psa_full.run(metric=metric)
Z, dgram = psa_full.plot(filename=plotname, linkage=linkage);


0.455962756008 173.0
/nfs/homes/sseyler/.local/lib/python2.7/site-packages/IPython/kernel/__main__.py:16: RuntimeWarning: divide by zero encountered in float_scalars
/nfs/homes/sseyler/.local/lib/python2.7/site-packages/IPython/kernel/__main__.py:13: RuntimeWarning: divide by zero encountered in float_scalars
/nfs/homes/sseyler/.local/lib/python2.7/site-packages/IPython/kernel/__main__.py:19: RuntimeWarning: divide by zero encountered in float_scalars

In [26]:
imgpath = WORKDIR+ '/source/core/psadata/plots/' + plotname
WImage(filename=(imgpath), resolution=120) # Display PDF from filesystem


Out[26]:
%%cython -f -c=-O3 #-c=-funroll-loops -c=-ffast-math -c=-march=native import numpy as np cimport numpy as np from libc.math cimport fmin, fmax, sqrt cimport cython ctypedef float[::1] (*c_ptr)(float[:,::1], float[:,:,::1], np.intp_t, np.intp_t) @cython.boundscheck(False) @cython.wraparound(False) cdef float[::1] c(float[:,::1] d, float[:,:,::1] cd, np.intp_t i, np.intp_t j): cdef np.intp_t im1, jm1 im1 = i-1 jm1 = j-1 if cd[i,j,0] != -1 : return cd[i,j,:] if i > 0: if j > 0: ci1j = c(d,cd,im1,j) cij1 = c(d,cd,i,jm1) ci1j1 = c(d,cd,im1,jm1) if ci1j[0]/ci1j[1] < cij1[0]/cij1[1]: if ci1j[0]/ci1j[1] < ci1j1[0]/ci1j1[1]: cd[i,j,0] = ci1j[0] + d[i,j] cd[i,j,1] = ci1j[1] + 1 elif cij1[0]/cij1[1] < ci1j1[0]/ci1j1[1]: cd[i,j,0] = cij1[0] + d[i,j] cd[i,j,1] = cij1[1] + 1 else: cd[i,j,0] = ci1j1[0] + d[i,j] cd[i,j,1] = ci1j1[1] + 1 else: cdcl = c(d,cd,im1,0) cd[i,0,0] = cdcl[0] + d[i,0] cd[i,0,1] = cdcl[1] + 1 elif j > 0: cdcl = c(d,cd,0,jm1) cd[0,j,0] = cdcl[0] + d[0,j] cd[0,j,1] = cdcl[1] + 1 else: cd[0,0,0] = d[0,0] cd[0,0,1] = 1 return cd[i,j,:] @cython.boundscheck(False) @cython.wraparound(False) cdef float[:,::1] rmsdMatrix_c(float[:,::1] P, float[:,::1] Q): cdef np.intp_t lenP = P.shape[0] cdef np.intp_t lenQ = Q.shape[0] cdef np.intp_t i, j, k cdef float s, diff cdef float[:,::1] d = np.empty((lenP, lenQ), dtype='float32') for i in xrange(lenP): for j in xrange(lenQ): s = 0.0 for k in xrange(P.shape[1]): diff = P[i,k] - Q[j,k] s += diff*diff d[i,j] = s return d @cython.boundscheck(False) @cython.wraparound(False) def frechet_avg(float[:,::1] P, float[:,::1] Q, np.intp_t N): cdef np.intp_t lenP = P.shape[0] cdef np.intp_t lenQ = Q.shape[0] assert (int(P.shape[1]) == int(3*N) and P.shape[1] == Q.shape[1]) cdef float[:,::1] d = rmsdMatrix_c(P, Q) cdef float[:,:,::1] cd = -np.ones((lenP, lenQ, 2), dtype='float32') cdef c_ptr couplingDistance = &c cdcl = couplingDistance(d, cd, lenP-1, lenQ-1) print cdcl[0], cdcl[1] return sqrt( cdcl[0] / cdcl[1] / N )