notebook.community

Edit and run



In [5]:

    
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
%matplotlib inline
import mdtraj
plt.rc('font', family='serif')



In [6]:

    
# we want to find the atoms with the fastest fluctuations when optimally aligned, and then
# deemphasize them in a weighted RMSD Mahalanobis distance
# the result will be a vector of length N where N is the number of atoms in the molecule
# which will be written on the diagonal of an NxN matrix and multiplied into RMSD or Binet-Cauchy computations



In [7]:

    
from mdtraj.geometry import alignment



In [8]:

    
npr.seed(0)
X = npr.randn(100,3)
Y = X.dot(np.diag([1,2,1]))



In [9]:

    
def compute_atomwise_deviation_xyz(X_xyz,Y_xyz):
    X_prime = alignment.transform(X_xyz, Y_xyz)
    delta = X_prime - Y_xyz
    deviation = ((delta**2).sum(1))**0.5
    return deviation

def compute_atomwise_deviation(X,Y):
    return compute_atomwise_deviation_xyz(X.xyz[0],Y.xyz[0])



In [10]:

    
from msmbuilder.example_datasets import FsPeptide
dataset = FsPeptide().get()
fs_trajectories = dataset.trajectories
fs_t = fs_trajectories[0]









    



loading trajectory_1.xtc...
loading trajectory_10.xtc...
loading trajectory_11.xtc...
loading trajectory_12.xtc...
loading trajectory_13.xtc...
loading trajectory_14.xtc...
loading trajectory_15.xtc...
loading trajectory_16.xtc...
loading trajectory_17.xtc...
loading trajectory_18.xtc...
loading trajectory_19.xtc...
loading trajectory_2.xtc...
loading trajectory_20.xtc...
loading trajectory_21.xtc...
loading trajectory_22.xtc...
loading trajectory_23.xtc...
loading trajectory_24.xtc...
loading trajectory_25.xtc...
loading trajectory_26.xtc...
loading trajectory_27.xtc...
loading trajectory_28.xtc...
loading trajectory_3.xtc...
loading trajectory_4.xtc...
loading trajectory_5.xtc...
loading trajectory_6.xtc...
loading trajectory_7.xtc...
loading trajectory_8.xtc...
loading trajectory_9.xtc...



In [16]:

    
dataset.DESCR









    Out[16]:





'This dataset consists of 28 molecular dynamics trajectories of Fs peptide\n(Ace-A_5(AAARA)_3A-NME), a widely studied model system for protein folding.\nEach trajectory is 500 ns in length, and saved at a 50 ps time interval (14\nus aggegrate sampling). The simulations were performed using the AMBER99SB-ILDN\nforce field with GBSA-OBC implicit solvent at 300K, starting from randomly\nsampled conformations from an initial 400K unfolding simulation. The\nsimulations were performed with OpenMM 6.0.1.\n\nThe dataset, including the script used to generate the dataset\nis available on figshare at\n\nhttp://dx.doi.org/10.6084/m9.figshare.1030363\n'



In [ ]:

    
from simtk.u



In [11]:

    
compute_atomwise_deviation(fs_t[0],fs_t[1])









    Out[11]:





array([ 0.2736049 ,  0.31249315,  0.45631918,  0.56824243,  0.39038154,
        0.61741918,  0.12921417,  0.1910737 ,  0.1078198 ,  0.19963315,
        0.22519577,  0.19590892,  0.32110301,  0.27312964,  0.17915575,
        0.29011145,  0.21170364,  0.22641671,  0.28967503,  0.37695208,
        0.38686621,  0.46105647,  0.36235049,  0.42556977,  0.21474868,
        0.30348644,  0.07641894,  0.0773477 ,  0.0979026 ,  0.12217472,
        0.26349691,  0.41632593,  0.21077415,  0.34280497,  0.12900214,
        0.22966817,  0.15264788,  0.18641464,  0.22280852,  0.28097129,
        0.31960946,  0.40531895,  0.4608331 ,  0.24450701,  0.13093749,
        0.13449605,  0.11552193,  0.18730059,  0.07964478,  0.05096602,
        0.1790981 ,  0.21442762,  0.25200829,  0.15901408,  0.10622683,
        0.12826923,  0.14088871,  0.15214884,  0.13704404,  0.13312384,
        0.14975192,  0.16903874,  0.2071446 ,  0.28365952,  0.137787  ,
        0.13306697,  0.14059778,  0.15959011,  0.14029184,  0.12710716,
        0.18063478,  0.35625172,  0.10207326,  0.28104782,  0.09882641,
        0.07994933,  0.10873666,  0.13672538,  0.08836164,  0.11968827,
        0.08613986,  0.13144299,  0.11528772,  0.04951949,  0.07396892,
        0.10250974,  0.0684662 ,  0.05562723,  0.08304226,  0.09911055,
        0.118176  ,  0.12207035,  0.10744581,  0.15628847,  0.16499358,
        0.16226487,  0.18383072,  0.21413042,  0.18444121,  0.17167799,
        0.18950546,  0.23085724,  0.21451013,  0.1874112 ,  0.2813938 ,
        0.3803615 ,  0.36933097,  0.51666015,  0.07604038,  0.11526725,
        0.06953023,  0.0834141 ,  0.06965434,  0.07957602,  0.11466946,
        0.02603173,  0.22581604,  0.23399417,  0.05660598,  0.0633894 ,
        0.06096561,  0.0584078 ,  0.10130248,  0.12177232,  0.17511593,
        0.21284053,  0.17900006,  0.19028986,  0.07969383,  0.06198949,
        0.16245876,  0.24537244,  0.16098388,  0.21054016,  0.14790055,
        0.19190511,  0.19369851,  0.13854381,  0.12232834,  0.10721578,
        0.1212398 ,  0.14839405,  0.11311359,  0.15620209,  0.12269408,
        0.27534696,  0.19893363,  0.09502786,  0.0916092 ,  0.14490603,
        0.05907763,  0.02770012,  0.08763096,  0.13961124,  0.1068164 ,
        0.1389439 ,  0.08608805,  0.15902513,  0.15249562,  0.17320508,
        0.17033249,  0.21603857,  0.19296238,  0.14629714,  0.13926451,
        0.13422097,  0.10300319,  0.11057002,  0.08422378,  0.19668268,
        0.26088023,  0.16354559,  0.09102508,  0.12533148,  0.08670909,
        0.08037364,  0.11060914,  0.13321182,  0.11274053,  0.2410972 ,
        0.12315571,  0.17093737,  0.11514347,  0.11864223,  0.11689315,
        0.11014019,  0.1162385 ,  0.13454179,  0.12747464,  0.11940071,
        0.17203632,  0.1416502 ,  0.08350129,  0.08713771,  0.0798448 ,
        0.0906393 ,  0.07769743,  0.09451322,  0.08990317,  0.0995929 ,
        0.09022301,  0.08342772,  0.05556761,  0.05897054,  0.05646276,
        0.05711618,  0.05543968,  0.06214446,  0.08703855,  0.08193537,
        0.0901523 ,  0.08483461,  0.04323953,  0.07134223,  0.04644313,
        0.07818524,  0.06051854,  0.05542007,  0.06209747,  0.06591801,
        0.06468922,  0.06952331,  0.05912381,  0.06832528,  0.09767997,
        0.12175012,  0.07462186,  0.14969534,  0.17316277,  0.17509332,
        0.26828697,  0.29113844,  0.28924498,  0.12270362,  0.07779097,
        0.16639258,  0.05396756,  0.08671716,  0.03374949,  0.01348437,
        0.03220952,  0.06588786,  0.07182484,  0.11796564,  0.0933202 ,
        0.09505975,  0.01701014,  0.05254527,  0.11988227,  0.1631984 ,
        0.16958931,  0.14363484,  0.26577049,  0.32100785,  0.28609648,
        0.3033129 ,  0.20189811,  0.1830287 ,  0.31714955,  0.35023552,
        0.42976359,  0.37103209,  0.45696107,  0.5895974 ], dtype=float32)



In [17]:

    
tau=20
n_frames=len(fs_t)-tau
n_atoms = fs_t.n_atoms
atomwise_deviations=np.zeros((n_frames,n_atoms))
for i in range(n_frames):
    atomwise_deviations[i] = compute_atomwise_deviation(fs_t[i],fs_t[i+tau])



In [13]:

    
atomwise_deviations.shape









    Out[13]:





(9000, 264)



In [15]:

    
fs









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-ba9abb932e92> in <module>()
----> 1 fs

NameError: name 'fs' is not defined



In [27]:

    
mean = np.mean(atomwise_deviations,0)
stdev = np.std(atomwise_deviations,0)
plt.plot(mean,c='darkblue')
#plt.plot(mean+stdev,c='blue',linestyle='--')
#plt.plot(mean-stdev,c='blue',linestyle='--')
plt.fill_between(range(len(mean)),mean-stdev,mean+stdev,color='blue',alpha=0.3)
plt.xlabel('Atom index')
plt.ylabel(r'Mean displacement, computed at $\tau=1$ns')
plt.title('Atomwise deviation from Fs peptide simulation')
plt.xlim(0,263)
plt.savefig('atomwise_deviation.pdf')



In [31]:

    
plt.plot((1/mean) / np.max(1/mean),c='darkblue')
plt









    Out[31]:





[<matplotlib.lines.Line2D at 0x1492cde50>]



In [38]:

    
from msmbuilder.cluster import MiniBatchKMedoids
kmed = MiniBatchKMedoids(metric='rmsd')
kmed.fit(fs_trajectories)









    Out[38]:





MiniBatchKMedoids(batch_size=100, max_iter=5, max_no_improvement=10,
         metric='rmsd', n_clusters=8, random_state=None)



In [39]:

    
clustered = kmed.transform(fs_trajectories)



In [41]:

    
from msmbuilder.msm import MarkovStateModel
msm = MarkovStateModel()
msm.fit(clustered)









    



MSM contains 1 strongly connected component above weight=1.00. Component 0 selected, with population 100.000000%






    Out[41]:





MarkovStateModel(ergodic_cutoff=1.0, lag_time=1, n_timescales=10,
         prior_counts=0, reversible_type='mle', sliding_window=True,
         verbose=True)



In [42]:

    
msm.score_









    Out[42]:





7.1591566439116132



In [44]:

    
from msmbuilder.featurizer import DihedralFeaturizer
dih_model = DihedralFeaturizer()
dih_traj = dih_model.fit_transform(fs_trajectories)



In [48]:

    
kmed = MiniBatchKMedoids()
clustered = kmed.fit_transform(dih_traj)
msm = MarkovStateModel()
msm.fit(clustered)
msm.score_









    



MSM contains 1 strongly connected component above weight=1.00. Component 0 selected, with population 100.000000%






    Out[48]:





5.6936650906587127



In [61]:

    
from msmbuilder.decomposition import tICA

tica = tICA(lag_time=10,weighted_transform=True)
tica_traj = tica.fit_transform(dih_traj)

kmed = MiniBatchKMedoids()
clustered = kmed.fit_transform(tica_traj)
msm = MarkovStateModel()
msm.fit(clustered)
msm.score_









    



MSM contains 1 strongly connected component above weight=1.00. Component 0 selected, with population 100.000000%






    Out[61]:





7.6221528864894612



In [63]:

    
trajectory = fs_trajectories[0]



In [102]:

    
def wrmsd_dmat(trajectory,weights):
    dmat = np.zeros((len(trajectory),len(trajectory)))
    N = trajectory.n_atoms
    for i in range(1,len(dmat)):
        # align everything to the current "pivot" element
        trajectory.superpose(trajectory,i)
        
        # compute atomwise squared deviations
        atomwise_deviations = ((trajectory[:i].xyz - trajectory[i].xyz)**2).sum(-1)
        
        # dot with a weight vector and divide by the number of atoms
        weighted_rmsd = atomwise_deviations.dot(weights) / N
        
        # assign to the lower-left half of the distance matrix
        dmat[i,:i] = weighted_rmsd
    
    # return a full distance matrix
    return dmat + dmat.T



In [114]:

    
trajectory = fs_trajectories[0][::20]
len(trajectory)









    Out[114]:





500



In [115]:

    
dmat = wrmsd_dmat(trajectory,np.ones(trajectory.n_atoms))



In [125]:

    
%timeit dmat = wrmsd_dmat(trajectory,np.ones(trajectory.n_atoms))









    



1 loops, best of 3: 5.56 s per loop



In [ ]:



In [116]:

    
dmat.shape









    Out[116]:





(500, 500)



In [117]:

    
plt.imshow(dmat,interpolation='none',cmap='Blues')









    Out[117]:





<matplotlib.image.AxesImage at 0x114042050>



In [118]:

    
dmat_1 = wrmsd_dmat(trajectory,mean)
plt.imshow(dmat_1,interpolation='none',cmap='Blues')









    Out[118]:





<matplotlib.image.AxesImage at 0x1140675d0>



In [119]:

    
dmat_2 = wrmsd_dmat(trajectory,1/mean)
plt.imshow(dmat_2,interpolation='none',cmap='Blues')









    Out[119]:





<matplotlib.image.AxesImage at 0x118cee790>



In [121]:

    
from sklearn.manifold import MDS



In [124]:

    
# unweighted
mds = MDS(dissimilarity='precomputed')
X = mds.fit_transform(dmat)
plt.scatter(X[:,0],X[:,1],linewidths=0,c=range(len(X)))

# weighted (dumbly)
plt.figure()
mds = MDS(dissimilarity='precomputed')
X = mds.fit_transform(dmat_1)
plt.scatter(X[:,0],X[:,1],linewidths=0,c=range(len(X)))

# weighted (cleverly)
plt.figure()
mds = MDS(dissimilarity='precomputed')
X = mds.fit_transform(dmat_2)
plt.scatter(X[:,0],X[:,1],linewidths=0,c=range(len(X)))









    Out[124]:





<matplotlib.collections.PathCollection at 0x11428f0d0>



In [90]:

    
deviations = (trajectory[:1000].xyz - trajectory[1].xyz).sum(-1)


deviations.shape,deviations.sum(1).shape



In [91]:

    
plt.plot(deviations);









    Out[91]:





[<matplotlib.lines.Line2D at 0x117c43d50>,
 <matplotlib.lines.Line2D at 0x117c43ed0>,
 <matplotlib.lines.Line2D at 0x117c60090>,
 <matplotlib.lines.Line2D at 0x117c60210>,
 <matplotlib.lines.Line2D at 0x117c60390>,
 <matplotlib.lines.Line2D at 0x117c60510>,
 <matplotlib.lines.Line2D at 0x117c60690>,
 <matplotlib.lines.Line2D at 0x117cb0650>,
 <matplotlib.lines.Line2D at 0x117c60990>,
 <matplotlib.lines.Line2D at 0x117c60b10>,
 <matplotlib.lines.Line2D at 0x117c60c90>,
 <matplotlib.lines.Line2D at 0x117c60e10>,
 <matplotlib.lines.Line2D at 0x117c60f90>,
 <matplotlib.lines.Line2D at 0x117c69150>,
 <matplotlib.lines.Line2D at 0x117c60810>,
 <matplotlib.lines.Line2D at 0x117c69450>,
 <matplotlib.lines.Line2D at 0x117c695d0>,
 <matplotlib.lines.Line2D at 0x117c69750>,
 <matplotlib.lines.Line2D at 0x117c698d0>,
 <matplotlib.lines.Line2D at 0x117c69a50>,
 <matplotlib.lines.Line2D at 0x117c69bd0>,
 <matplotlib.lines.Line2D at 0x117c692d0>,
 <matplotlib.lines.Line2D at 0x117c69ed0>,
 <matplotlib.lines.Line2D at 0x117c79090>,
 <matplotlib.lines.Line2D at 0x117c79210>,
 <matplotlib.lines.Line2D at 0x117c79390>,
 <matplotlib.lines.Line2D at 0x117c79510>,
 <matplotlib.lines.Line2D at 0x117c79690>,
 <matplotlib.lines.Line2D at 0x117c69d50>,
 <matplotlib.lines.Line2D at 0x117c79990>,
 <matplotlib.lines.Line2D at 0x117c79b10>,
 <matplotlib.lines.Line2D at 0x117c79c90>,
 <matplotlib.lines.Line2D at 0x117c79e10>,
 <matplotlib.lines.Line2D at 0x117c79f90>,
 <matplotlib.lines.Line2D at 0x117d24150>,
 <matplotlib.lines.Line2D at 0x117c79810>,
 <matplotlib.lines.Line2D at 0x117d24450>,
 <matplotlib.lines.Line2D at 0x117d245d0>,
 <matplotlib.lines.Line2D at 0x117d24750>,
 <matplotlib.lines.Line2D at 0x117d248d0>,
 <matplotlib.lines.Line2D at 0x117d24a90>,
 <matplotlib.lines.Line2D at 0x117d24c50>,
 <matplotlib.lines.Line2D at 0x117d242d0>,
 <matplotlib.lines.Line2D at 0x117d24fd0>,
 <matplotlib.lines.Line2D at 0x117d2e1d0>,
 <matplotlib.lines.Line2D at 0x117d2e390>,
 <matplotlib.lines.Line2D at 0x117d2e550>,
 <matplotlib.lines.Line2D at 0x117d2e710>,
 <matplotlib.lines.Line2D at 0x117d2e8d0>,
 <matplotlib.lines.Line2D at 0x117d24e10>,
 <matplotlib.lines.Line2D at 0x117d2ec50>,
 <matplotlib.lines.Line2D at 0x117d2ee10>,
 <matplotlib.lines.Line2D at 0x117d2efd0>,
 <matplotlib.lines.Line2D at 0x117d371d0>,
 <matplotlib.lines.Line2D at 0x117d37390>,
 <matplotlib.lines.Line2D at 0x117d37550>,
 <matplotlib.lines.Line2D at 0x117d2ea90>,
 <matplotlib.lines.Line2D at 0x117d378d0>,
 <matplotlib.lines.Line2D at 0x117d37a90>,
 <matplotlib.lines.Line2D at 0x117d37c50>,
 <matplotlib.lines.Line2D at 0x117d37e10>,
 <matplotlib.lines.Line2D at 0x117d37fd0>,
 <matplotlib.lines.Line2D at 0x117d3e1d0>,
 <matplotlib.lines.Line2D at 0x117d37710>,
 <matplotlib.lines.Line2D at 0x117d3e550>,
 <matplotlib.lines.Line2D at 0x117d3e710>,
 <matplotlib.lines.Line2D at 0x117d3e8d0>,
 <matplotlib.lines.Line2D at 0x117d3ea90>,
 <matplotlib.lines.Line2D at 0x117d3ec50>,
 <matplotlib.lines.Line2D at 0x117d3ee10>,
 <matplotlib.lines.Line2D at 0x117d3e390>,
 <matplotlib.lines.Line2D at 0x117d4a1d0>,
 <matplotlib.lines.Line2D at 0x117d4a390>,
 <matplotlib.lines.Line2D at 0x117d4a550>,
 <matplotlib.lines.Line2D at 0x117d4a710>,
 <matplotlib.lines.Line2D at 0x117d4a8d0>,
 <matplotlib.lines.Line2D at 0x117d4aa90>,
 <matplotlib.lines.Line2D at 0x117d3efd0>,
 <matplotlib.lines.Line2D at 0x117d4ae10>,
 <matplotlib.lines.Line2D at 0x117d4afd0>,
 <matplotlib.lines.Line2D at 0x117d521d0>,
 <matplotlib.lines.Line2D at 0x117d52390>,
 <matplotlib.lines.Line2D at 0x117d52550>,
 <matplotlib.lines.Line2D at 0x117d52710>,
 <matplotlib.lines.Line2D at 0x117d4ac50>,
 <matplotlib.lines.Line2D at 0x117d52a90>,
 <matplotlib.lines.Line2D at 0x117d52c50>,
 <matplotlib.lines.Line2D at 0x117d52e10>,
 <matplotlib.lines.Line2D at 0x117d52fd0>,
 <matplotlib.lines.Line2D at 0x117d5b1d0>,
 <matplotlib.lines.Line2D at 0x117d5b390>,
 <matplotlib.lines.Line2D at 0x117d528d0>,
 <matplotlib.lines.Line2D at 0x117d5b710>,
 <matplotlib.lines.Line2D at 0x117d5b8d0>,
 <matplotlib.lines.Line2D at 0x117d5ba90>,
 <matplotlib.lines.Line2D at 0x117d5bc50>,
 <matplotlib.lines.Line2D at 0x117d5be10>,
 <matplotlib.lines.Line2D at 0x117d5bfd0>,
 <matplotlib.lines.Line2D at 0x117d5b550>,
 <matplotlib.lines.Line2D at 0x117a0a390>,
 <matplotlib.lines.Line2D at 0x117a0a550>,
 <matplotlib.lines.Line2D at 0x117a0a710>,
 <matplotlib.lines.Line2D at 0x117a0a8d0>,
 <matplotlib.lines.Line2D at 0x117a0aa90>,
 <matplotlib.lines.Line2D at 0x117a0ac50>,
 <matplotlib.lines.Line2D at 0x117a0a1d0>,
 <matplotlib.lines.Line2D at 0x117a0afd0>,
 <matplotlib.lines.Line2D at 0x117a141d0>,
 <matplotlib.lines.Line2D at 0x117a14390>,
 <matplotlib.lines.Line2D at 0x117a14550>,
 <matplotlib.lines.Line2D at 0x117a14710>,
 <matplotlib.lines.Line2D at 0x117a148d0>,
 <matplotlib.lines.Line2D at 0x117a0ae10>,
 <matplotlib.lines.Line2D at 0x117a14c50>,
 <matplotlib.lines.Line2D at 0x117a14e10>,
 <matplotlib.lines.Line2D at 0x117a14fd0>,
 <matplotlib.lines.Line2D at 0x117a1d1d0>,
 <matplotlib.lines.Line2D at 0x117a1d390>,
 <matplotlib.lines.Line2D at 0x117a1d550>,
 <matplotlib.lines.Line2D at 0x117a14a90>,
 <matplotlib.lines.Line2D at 0x117a1d8d0>,
 <matplotlib.lines.Line2D at 0x117a1da90>,
 <matplotlib.lines.Line2D at 0x117a1dc50>,
 <matplotlib.lines.Line2D at 0x117a1de10>,
 <matplotlib.lines.Line2D at 0x117a1dfd0>,
 <matplotlib.lines.Line2D at 0x117a251d0>,
 <matplotlib.lines.Line2D at 0x117a1d710>,
 <matplotlib.lines.Line2D at 0x117a25550>,
 <matplotlib.lines.Line2D at 0x117a25710>,
 <matplotlib.lines.Line2D at 0x117a258d0>,
 <matplotlib.lines.Line2D at 0x117a25a90>,
 <matplotlib.lines.Line2D at 0x117a25c50>,
 <matplotlib.lines.Line2D at 0x117a25e10>,
 <matplotlib.lines.Line2D at 0x117a25390>,
 <matplotlib.lines.Line2D at 0x117a2e1d0>,
 <matplotlib.lines.Line2D at 0x117a2e390>,
 <matplotlib.lines.Line2D at 0x117a2e550>,
 <matplotlib.lines.Line2D at 0x117a2e710>,
 <matplotlib.lines.Line2D at 0x117a2e8d0>,
 <matplotlib.lines.Line2D at 0x117a2ea90>,
 <matplotlib.lines.Line2D at 0x117a25fd0>,
 <matplotlib.lines.Line2D at 0x117a2ee10>,
 <matplotlib.lines.Line2D at 0x117a2efd0>,
 <matplotlib.lines.Line2D at 0x117a381d0>,
 <matplotlib.lines.Line2D at 0x117a38390>,
 <matplotlib.lines.Line2D at 0x117a38550>,
 <matplotlib.lines.Line2D at 0x117a38710>,
 <matplotlib.lines.Line2D at 0x117a2ec50>,
 <matplotlib.lines.Line2D at 0x117a38a90>,
 <matplotlib.lines.Line2D at 0x117a38c50>,
 <matplotlib.lines.Line2D at 0x117a38e10>,
 <matplotlib.lines.Line2D at 0x117a38fd0>,
 <matplotlib.lines.Line2D at 0x117a401d0>,
 <matplotlib.lines.Line2D at 0x117a40390>,
 <matplotlib.lines.Line2D at 0x117a388d0>,
 <matplotlib.lines.Line2D at 0x117a40710>,
 <matplotlib.lines.Line2D at 0x117a408d0>,
 <matplotlib.lines.Line2D at 0x117a40a90>,
 <matplotlib.lines.Line2D at 0x117a40c50>,
 <matplotlib.lines.Line2D at 0x117a40e10>,
 <matplotlib.lines.Line2D at 0x117a40fd0>,
 <matplotlib.lines.Line2D at 0x117a40550>,
 <matplotlib.lines.Line2D at 0x117d88390>,
 <matplotlib.lines.Line2D at 0x117d88550>,
 <matplotlib.lines.Line2D at 0x117d88710>,
 <matplotlib.lines.Line2D at 0x117d888d0>,
 <matplotlib.lines.Line2D at 0x117d88a90>,
 <matplotlib.lines.Line2D at 0x117d88c50>,
 <matplotlib.lines.Line2D at 0x117d881d0>,
 <matplotlib.lines.Line2D at 0x117d88fd0>,
 <matplotlib.lines.Line2D at 0x117cbbc90>,
 <matplotlib.lines.Line2D at 0x117cbb710>,
 <matplotlib.lines.Line2D at 0x117cbb5d0>,
 <matplotlib.lines.Line2D at 0x117cb0b50>,
 <matplotlib.lines.Line2D at 0x117cb0e90>,
 <matplotlib.lines.Line2D at 0x117d88e10>,
 <matplotlib.lines.Line2D at 0x117cb0510>,
 <matplotlib.lines.Line2D at 0x117ca43d0>,
 <matplotlib.lines.Line2D at 0x117ca4590>,
 <matplotlib.lines.Line2D at 0x117ca4790>,
 <matplotlib.lines.Line2D at 0x117ca4650>,
 <matplotlib.lines.Line2D at 0x117c85e90>,
 <matplotlib.lines.Line2D at 0x117cb0d50>,
 <matplotlib.lines.Line2D at 0x117af29d0>,
 <matplotlib.lines.Line2D at 0x117c1d2d0>,
 <matplotlib.lines.Line2D at 0x117c1d5d0>,
 <matplotlib.lines.Line2D at 0x117c1d750>,
 <matplotlib.lines.Line2D at 0x117c1dfd0>,
 <matplotlib.lines.Line2D at 0x117c1d9d0>,
 <matplotlib.lines.Line2D at 0x10c3ace50>,
 <matplotlib.lines.Line2D at 0x117c2d210>,
 <matplotlib.lines.Line2D at 0x117c2db90>,
 <matplotlib.lines.Line2D at 0x117c2d410>,
 <matplotlib.lines.Line2D at 0x117c2d710>,
 <matplotlib.lines.Line2D at 0x117c2df90>,
 <matplotlib.lines.Line2D at 0x117c3b2d0>,
 <matplotlib.lines.Line2D at 0x117c1d250>,
 <matplotlib.lines.Line2D at 0x117c3b050>,
 <matplotlib.lines.Line2D at 0x117c3bb50>,
 <matplotlib.lines.Line2D at 0x117c3be50>,
 <matplotlib.lines.Line2D at 0x117c4a090>,
 <matplotlib.lines.Line2D at 0x117c4a390>,
 <matplotlib.lines.Line2D at 0x117c4ab90>,
 <matplotlib.lines.Line2D at 0x117c3b750>,
 <matplotlib.lines.Line2D at 0x117c4a890>,
 <matplotlib.lines.Line2D at 0x117c593d0>,
 <matplotlib.lines.Line2D at 0x117c59750>,
 <matplotlib.lines.Line2D at 0x117c59ad0>,
 <matplotlib.lines.Line2D at 0x117c597d0>,
 <matplotlib.lines.Line2D at 0x117c6af10>,
 <matplotlib.lines.Line2D at 0x117c4af10>,
 <matplotlib.lines.Line2D at 0x117c6a110>,
 <matplotlib.lines.Line2D at 0x117c6ab90>,
 <matplotlib.lines.Line2D at 0x117c6a890>,
 <matplotlib.lines.Line2D at 0x117aeaa90>,
 <matplotlib.lines.Line2D at 0x117c77350>,
 <matplotlib.lines.Line2D at 0x117c777d0>,
 <matplotlib.lines.Line2D at 0x117c6a590>,
 <matplotlib.lines.Line2D at 0x117c77e50>,
 <matplotlib.lines.Line2D at 0x114383d10>,
 <matplotlib.lines.Line2D at 0x1143ad690>,
 <matplotlib.lines.Line2D at 0x117c0de90>,
 <matplotlib.lines.Line2D at 0x117c0d890>,
 <matplotlib.lines.Line2D at 0x117c0d310>,
 <matplotlib.lines.Line2D at 0x117c770d0>,
 <matplotlib.lines.Line2D at 0x117ad70d0>,
 <matplotlib.lines.Line2D at 0x117ac4350>,
 <matplotlib.lines.Line2D at 0x117d911d0>,
 <matplotlib.lines.Line2D at 0x117d91390>,
 <matplotlib.lines.Line2D at 0x117d91550>,
 <matplotlib.lines.Line2D at 0x117d91710>,
 <matplotlib.lines.Line2D at 0x116cb0950>,
 <matplotlib.lines.Line2D at 0x117d91a90>,
 <matplotlib.lines.Line2D at 0x117d91c50>,
 <matplotlib.lines.Line2D at 0x117d91e10>,
 <matplotlib.lines.Line2D at 0x117d91fd0>,
 <matplotlib.lines.Line2D at 0x117da71d0>,
 <matplotlib.lines.Line2D at 0x117da7390>,
 <matplotlib.lines.Line2D at 0x117d918d0>,
 <matplotlib.lines.Line2D at 0x117da7710>,
 <matplotlib.lines.Line2D at 0x117da78d0>,
 <matplotlib.lines.Line2D at 0x117da7a90>,
 <matplotlib.lines.Line2D at 0x117da7c50>,
 <matplotlib.lines.Line2D at 0x117da7e10>,
 <matplotlib.lines.Line2D at 0x117da7fd0>,
 <matplotlib.lines.Line2D at 0x117da7550>,
 <matplotlib.lines.Line2D at 0x117db1390>,
 <matplotlib.lines.Line2D at 0x117db1550>,
 <matplotlib.lines.Line2D at 0x117db1710>,
 <matplotlib.lines.Line2D at 0x117db18d0>,
 <matplotlib.lines.Line2D at 0x117db1a90>,
 <matplotlib.lines.Line2D at 0x117db1c50>,
 <matplotlib.lines.Line2D at 0x117db11d0>,
 <matplotlib.lines.Line2D at 0x117db1fd0>,
 <matplotlib.lines.Line2D at 0x117db91d0>,
 <matplotlib.lines.Line2D at 0x117db9390>,
 <matplotlib.lines.Line2D at 0x117db9550>,
 <matplotlib.lines.Line2D at 0x117db9710>,
 <matplotlib.lines.Line2D at 0x117db98d0>,
 <matplotlib.lines.Line2D at 0x117db1e10>,
 <matplotlib.lines.Line2D at 0x117db9c50>,
 <matplotlib.lines.Line2D at 0x117db9e10>,
 <matplotlib.lines.Line2D at 0x117db9fd0>,
 <matplotlib.lines.Line2D at 0x117dc11d0>]



In [83]:

    
weights = np.ones(trajectory.n_atoms)
weights.shape









    Out[83]:





(264,)



In [85]:

    
deviations.dot(weights).shape









    Out[85]:





(10,)



In [ ]:



In [ ]:

    
class AltMiniBatchKMedoids(MiniBatchKMedoids):
    
    ''' redefines fit and transform to also allow weighted rmsd instead of just 'rmsd' 
    
    metric = {any metric accepted by minibatchkmedoids, ('wrmsd', np.ndarray of weights)}
    
    going to do this by replacing the calls to libdistance.pdist and libdistance.pd
    
    better way: augment libdistance to accept callables instead of just strings'''
    

    
    def fit(self, X, y=None):
        n_samples = len(X)
        n_batches = int(np.ceil(float(n_samples) / self.batch_size))
        n_iter = int(self.max_iter * n_batches)
        random_state = check_random_state(self.random_state)

        cluster_ids_ = random_state.random_integers(
            low=0, high=n_samples - 1, size=self.n_clusters)
        labels_ = random_state.random_integers(
            low=0, high=self.n_clusters - 1, size=n_samples)

        n_iters_no_improvement = 0
        for kk in range(n_iter):
            # each minibatch includes the random indices AND the
            # current cluster centers
            minibatch_indices = np.concatenate([
                cluster_ids_,
                random_state.random_integers(
                    0, n_samples - 1, self.batch_size),
            ])
            X_indices=np.array(minibatch_indices, dtype=np.intp)
            
            if metric[0]=='wmrsd':
                dmat = wrmsd_dmat(X[X_indices],weights=metric[1])
            else:
                dmat = libdistance.pdist(X, metric=self.metric, X_indices=X_indices)
            minibatch_labels = np.array(np.concatenate([
                np.arange(self.n_clusters),
                labels_[minibatch_indices[self.n_clusters:]]
            ]), dtype=np.intp)

            ids, intertia, _ = _kmedoids.kmedoids(
                self.n_clusters, dmat, 0, minibatch_labels,
                random_state=random_state)
            minibatch_labels, m = _kmedoids.contigify_ids(ids)

            # Copy back the new cluster_ids_ for the centers
            minibatch_cluster_ids = np.array(
                sorted(m.items(), key=itemgetter(1)))[:, 0]
            cluster_ids_ = minibatch_indices[minibatch_cluster_ids]

            # Copy back the new labels for the elements
            n_changed = np.sum(labels_[minibatch_indices] != minibatch_labels)
            if n_changed == 0:
                n_iters_no_improvement += 1
            else:
                labels_[minibatch_indices] = minibatch_labels
                n_iters_no_improvement = 0
            if n_iters_no_improvement >= self.max_no_improvement:
                break

        self.cluster_ids_ = cluster_ids_
        self.cluster_centers_ = X[cluster_ids_]
        self.labels_, self.inertia_ = libdistance.assign_nearest(
            X, self.cluster_centers_, metric=self.metric)
        return self

    def predict(self, X):
        """Predict the closest cluster each sample in X belongs to.
        In the vector quantization literature, `cluster_centers_` is called
        the code book and each value returned by `predict` is the index of
        the closest code in the code book.
        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            New data to predict.
        Returns
        -------
        Y : array, shape [n_samples,]
            Index of the closest center each sample belongs to.
        """
        
        
        labels, _ = libdistance.assign_nearest(
            X, self.cluster_centers_, metric=self.metric)
        return labels



In [18]:

    
import mdtraj as md
md.



In [8]:

    
atomwise_deviations = dict()
for tau in list(range(1,10)) + [25,50,100]:
    print(tau)
    n_frames=len(fs_t)-tau
    n_atoms = fs_t[0].n_atoms
    atomwise_deviations[tau]=np.zeros((n_frames,n_atoms))
    for i in range(n_frames):
        atomwise_deviations[tau][i] = compute_atomwise_deviation(fs_t[i],fs_t[i+tau])









    



1
2
3






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-8-c6c63cd43aa5> in <module>()
      6     atomwise_deviations[tau]=np.zeros((n_frames,n_atoms))
      7     for i in range(n_frames):
----> 8         atomwise_deviations[tau][i] = compute_atomwise_deviation(fs_t[i],fs_t[i+tau])

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/trajectory.pyc in __getitem__(self, key)
   1023     def __getitem__(self, key):
   1024         "Get a slice of this trajectory"
-> 1025         return self.slice(key)
   1026 
   1027     def slice(self, key, copy=True):

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/trajectory.pyc in slice(self, key, copy)
   1052             xyz = xyz.copy()
   1053             time = time.copy()
-> 1054             topology = deepcopy(self._topology)
   1055 
   1056             if self.unitcell_angles is not None:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
    172             copier = getattr(x, "__deepcopy__", None)
    173             if copier:
--> 174                 y = copier(memo)
    175             else:
    176                 reductor = dispatch_table.get(cls)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/topology.pyc in __deepcopy__(self, *args)
    224 
    225     def __deepcopy__(self, *args):
--> 226         return self.copy()
    227 
    228     def join(self, other):

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/topology.pyc in copy(self)
    213                 r = out.add_residue(residue.name, c, residue.resSeq)
    214                 for atom in residue.atoms:
--> 215                     out.add_atom(atom.name, atom.element, r, serial=atom.serial)
    216 
    217         for a1, a2 in self.bonds:

KeyboardInterrupt:



In [99]:

    
mean = np.mean(atomwise_deviations[1],0)
stdev = np.std(atomwise_deviations[1],0)
plt.plot(mean,c='green')
plt.plot(mean+stdev,c='blue',linestyle='--')
plt.plot(mean-stdev,c='blue',linestyle='--')









    Out[99]:





[<matplotlib.lines.Line2D at 0x10e1b5490>]



In [100]:

    
plt.plot(1.0/mean)









    Out[100]:





[<matplotlib.lines.Line2D at 0x10df5f310>]



In [101]:

    
plt.scatter(mean,stdev)









    Out[101]:





<matplotlib.collections.PathCollection at 0x1240b6310>



In [102]:

    
means = np.array([np.mean(d,0) for d in atomwise_deviations.values()]).T



In [ ]:

    
plt.plot(np.arange(len(means))+1,means[:,0],label='Lag-time: 50ps');
#plt.plot(np.arange(len(means))+1,means[:,-1],label='Lag-time: 5000ps');
plt.xlabel('Residue #')
plt.ylabel('Mean distance (nm)')
plt.title('Fs Peptide')
plt.legend(loc='best',frameon=False)



In [107]:

    
plt.plot(np.arange(len(means))+1,means[:,0],label='Lag-time: 50 ps');
plt.plot(np.arange(len(means))+1,means[:,-1],label='Lag-time: 5 ns');
plt.xlabel('Atom #')
plt.ylabel('Mean distance (nm)')
plt.title('Fs Peptide')
plt.legend(loc='best',frameon=False)









    Out[107]:





<matplotlib.legend.Legend at 0x126d8ad50>



In [ ]:



In [ ]:

    
fs_trajectories



In [75]:

    
s = sorted(np.arange(len(means)),key=lambda i:means[i][-1])



In [79]:

    
plt.plot(means[s]);
plt.xlabel('Atom (sorted)')



In [85]:

    
plt.plot(means[:,[0,4,8,9,10,11]][s]);



In [80]:

    
means.shape









    Out[80]:





(264, 12)



In [84]:

    
means[:,[0,4,8,9,10,11]].shape









    Out[84]:





(264, 6)



In [90]:

    
print(FsPeptide.description())









    



This dataset consists of 28 molecular dynamics trajectories of Fs peptide
(Ace-A_5(AAARA)_3A-NME), a widely studied model system for protein folding.
Each trajectory is 500 ns in length, and saved at a 50 ps time interval (14
us aggegrate sampling). The simulations were performed using the AMBER99SB-ILDN
force field with GBSA-OBC implicit solvent at 300K, starting from randomly
sampled conformations from an initial 400K unfolding simulation. The
simulations were performed with OpenMM 6.0.1.

The dataset, including the script used to generate the dataset
is available on figshare at

http://dx.doi.org/10.6084/m9.figshare.1030363



In [127]:

    
atoms,_ = fs_t.topology.to_dataframe()
atoms.element









    Out[127]:





0      C
1      O
2      C
3      H
4      H
5      H
6      N
7      H
8      C
9      H
10     C
11     H
12     H
13     H
14     C
15     O
16     N
17     H
18     C
19     H
20     C
21     H
22     H
23     H
24     C
25     O
26     N
27     H
28     C
29     H
      ..
234    H
235    H
236    C
237    O
238    N
239    H
240    C
241    H
242    C
243    H
244    H
245    H
246    C
247    O
248    N
249    H
250    C
251    H
252    C
253    H
254    H
255    H
256    C
257    O
258    N
259    H
260    C
261    H
262    H
263    H
Name: element, dtype: object



In [129]:

    
a = np.array(atoms.element)
a









    Out[129]:





array(['C', 'O', 'C', 'H', 'H', 'H', 'N', 'H', 'C', 'H', 'C', 'H', 'H',
       'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O',
       'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H', 'C',
       'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H',
       'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C',
       'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H',
       'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C',
       'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'N', 'H', 'C', 'N', 'H',
       'H', 'N', 'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H',
       'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O',
       'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H', 'C',
       'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H',
       'H', 'C', 'H', 'H', 'C', 'H', 'H', 'N', 'H', 'C', 'N', 'H', 'H',
       'N', 'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H',
       'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N',
       'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H',
       'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H',
       'C', 'H', 'H', 'C', 'H', 'H', 'N', 'H', 'C', 'N', 'H', 'H', 'N',
       'H', 'H', 'C', 'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C',
       'O', 'N', 'H', 'C', 'H', 'C', 'H', 'H', 'H', 'C', 'O', 'N', 'H',
       'C', 'H', 'H', 'H'], dtype=object)



In [136]:

    
atom_color_dict = {'C':0,'H':1,'O':2,'N':3}
atom_colors = np.array([atom_color_dict[e] for e in a])



In [ ]:



In [139]:

    
plt.scatter(np.arange(len(means))+1,means[:,0],#label='Lag-time: 50 ps',
            c=atom_colors);
plt.xlabel('Atom #')
plt.ylabel('Mean distance (nm)')
plt.title('Fs Peptide')
plt.colorbar()
#plt.legend(loc='best',frameon=False)









    Out[139]:





<matplotlib.colorbar.Colorbar instance at 0x12a023a70>



In [141]:

    
m = means[:,0]



In [142]:

    
m.shape









    Out[142]:





(264,)



In [144]:

    
for atom in atom_color_dict:
    print(atom,np.mean(m[a==atom]))









    



('H', 0.13019366434256116)
('C', 0.090030580933748994)
('O', 0.094697414488768061)
('N', 0.096386607212414266)



In [190]:

    
weights = 1/means[:,0]
weights.shape









    Out[190]:





(264,)



In [150]:

    
from msmbuilder.cluster import MiniBatchKMedoids



In [152]:

    
kmedoids = MiniBatchKMedoids(metric='rmsd')



In [166]:

    
X_ = kmedoids.fit_transform(fs_t)



In [167]:

    
X_ = np.array(X_)[:,0]
X_.shape









    Out[167]:





(10000,)



In [168]:

    
from msmbuilder import msm
m = msm.MarkovStateModel()



In [169]:

    
m.fit(X_)









    



MSM contains 1 strongly connected component above weight=1.00. Component 0 selected, with population 100.000000%






    Out[169]:





MarkovStateModel(ergodic_cutoff=1.0, lag_time=1, n_timescales=10,
         prior_counts=0, reversible_type='mle', sliding_window=True,
         verbose=True)



In [176]:

    
print(m.summarize())









    



Markov state model
------------------
Lag time         : 1
Reversible type  : mle
Ergodic cutoff   : 1.0
Prior counts     : 0

Number of states : 8
Number of nonzero entries in counts matrix : 51 (79.6875%)
Nonzero counts matrix entries:
    Min.   : 1.0
    1st Qu.: 3.0
    Median : 9.0
    Mean   : 196.1
    3rd Qu.: 35.0
    Max.   : 2777.0

Total transition counts :
    9999.0 counts
Total transition counts / lag_time:
    9999.0 units
Timescales:
    [45.39, 36.32, 19.03, 15.16, 4.74, 3.62, 2.12]  units



In [173]:

    
plt.imshow(m.transmat_,interpolation='none',cmap='Blues')
plt.colorbar()









    Out[173]:





<matplotlib.colorbar.Colorbar instance at 0x12ba8a998>



In [177]:

    
n=5000
t = fs_t[:n]



In [180]:

    
rmsd = np.zeros((n,n))
for i in range(n):
    rmsd[i,i:] = mdtraj.rmsd(t[i:n],t[i])



In [206]:

    
rmsd = rmsd + rmsd.T



In [207]:

    
plt.imshow(rmsd,interpolation='none',cmap='Blues')









    Out[207]:





<matplotlib.image.AxesImage at 0x12bac06d0>



In [ ]:



In [201]:

    
def wRMSD(X,Y,w='unweighted'):
    dev = compute_atomwise_deviation(X,Y)
    if w == 'unweighted':
        wdev = sum(dev)
    else:
        wdev = w.dot(dev)
    
    return np.sqrt(wdev) / len(X.xyz.T)



In [ ]:



In [202]:

    
wRMSD(t[0],t[1])









    Out[202]:





2.1978461661178819



In [204]:

    
wRMSD(t[0],t[1],weights)









    Out[204]:





6.5107794040569225



In [209]:

    
wrmsd_mat = np.zeros((n,n))
for i in range(len(t)):
    for j in range(i):
        wrmsd_mat[i,j] = wRMSD(t[i],t[j],weights)
    if i%10==0:
        print(i)









    



0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-209-adb1fb352d47> in <module>()
      2 for i in range(len(t)):
      3     for j in range(i):
----> 4         wrmsd_mat[i,j] = wRMSD(t[i],t[j],weights)
      5     if i%10==0:
      6         print(i)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/trajectory.pyc in __getitem__(self, key)
   1011     def __getitem__(self, key):
   1012         "Get a slice of this trajectory"
-> 1013         return self.slice(key)
   1014 
   1015     def slice(self, key, copy=True):

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/trajectory.pyc in slice(self, key, copy)
   1040             xyz = xyz.copy()
   1041             time = time.copy()
-> 1042             topology = deepcopy(self._topology)
   1043 
   1044             if self.unitcell_angles is not None:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
    172             copier = getattr(x, "__deepcopy__", None)
    173             if copier:
--> 174                 y = copier(memo)
    175             else:
    176                 reductor = dispatch_table.get(cls)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/topology.pyc in __deepcopy__(self, *args)
    221 
    222     def __deepcopy__(self, *args):
--> 223         return self.copy()
    224 
    225     def join(self, other):

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/topology.pyc in copy(self)
    210                 r = out.add_residue(residue.name, c, residue.resSeq)
    211                 for atom in residue.atoms:
--> 212                     out.add_atom(atom.name, atom.element, r, serial=atom.serial)
    213 
    214         for a1, a2 in self.bonds:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/topology.pyc in add_atom(self, name, element, residue, serial)
    568             the newly created Atom
    569         """
--> 570         atom = Atom(name, element, self._numAtoms, residue, serial=serial)
    571         self._atoms.append(atom)
    572         self._numAtoms += 1

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/mdtraj/core/topology.pyc in __init__(self, name, element, index, residue, serial)
   1117     """
   1118 
-> 1119     def __init__(self, name, element, index, residue, serial=None):
   1120         """Construct a new Atom.  You should call add_atom() on the Topology instead of calling this directly."""
   1121         ## The name of the Atom

KeyboardInterrupt:



In [222]:

    
wrmsd_subset = wrmsd_mat[:500,:500] + wrmsd_mat[:500,:500].T
rmsd_subset = rmsd[:500,:500]



In [230]:

    
plt.imshow(np.log(wrmsd_subset),cmap='Blues',interpolation='none')
plt.colorbar()









    Out[230]:





<matplotlib.colorbar.Colorbar instance at 0x1330047e8>



In [232]:

    
plt.imshow(np.log(rmsd_subset+1),cmap='Blues',interpolation='none')
plt.colorbar()









    Out[232]:





<matplotlib.colorbar.Colorbar instance at 0x12baa1bd8>



In [220]:

    
plt.hist(rmsd.flatten(),bins=100);



In [227]:

    
plt.hist(rmsd_subset.flatten(),bins=100);
plt.figure()
plt.hist(wrmsd_subset.flatten(),bins=100);



In [203]:

    
dev = compute_atomwise_deviation(t[0],t[1])
dev.shape









    Out[203]:





(264,)



In [ ]:

    
mdtraj.rmsd(



In [248]:

    
from msmbuilder import featurizer
rpf = featurizer.RawPositionsFeaturizer()
rpft = rpf.fit_transform(t.center_coordinates())



In [250]:

    
rpft[0].shape









    Out[250]:





(1, 792)



In [253]:

    
np.reshape(rpft,(len(t),n_atoms,3)).shape









    Out[253]:





(5000, 264, 3)



In [255]:

    
rpft = np.reshape(rpft,(len(t),n_atoms,3))



In [257]:

    
dev_mat = np.zeros((n,n,n_atoms))
for i in range(len(t)):
    for j in range(i):
        dev_mat[i,j] = compute_atomwise_deviation_xyz(rpft[i],rpft[j])
    if i%100==0:
        print(i)









    



0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-257-38fb0b6e9f7b> in <module>()
      2 for i in range(len(t)):
      3     for j in range(i):
----> 4         dev_mat[i,j] = compute_atomwise_deviation_xyz(rpft[i],rpft[j])
      5     if i%100==0:
      6         print(i)

KeyboardInterrupt:



In [258]:

    
n=2000
dev_mat = dev_mat[:n,:n]



In [263]:

    
dev_mat[0,:].shape









    Out[263]:





(2000, 264)



In [281]:

    
weights/=weights.sum()
weights.sum()









    Out[281]:





1.0



In [282]:

    
weighted_rmsd = dev_mat.dot(weights)
weighted_rmsd = weighted_rmsd+weighted_rmsd.T



In [267]:

    
plt.imshow(np.log(weighted_rmsd),cmap='Blues',interpolation='none')
plt.colorbar()









    Out[267]:





<matplotlib.colorbar.Colorbar instance at 0x12d4765a8>



In [272]:

    
plt.imshow(np.log(rmsd[:2000,:2000]+10),cmap='Blues',interpolation='none')









    Out[272]:





<matplotlib.image.AxesImage at 0x13293b390>



In [277]:

    
plt.scatter(rmsd[:2000,:2000].flatten(),weighted_rmsd.flatten(),alpha=0.1,linewidths=0)









    Out[277]:





<matplotlib.collections.PathCollection at 0x131ebb9d0>



In [241]:

    
from numpy.linalg import det
def BC(X,Y):
    return det(X.T.dot(Y)) / np.sqrt(det(X.T.dot(X)) * det(Y.T.dot(Y)))

def BC_w(X,Y,M=None):
    if M==None:
        M = np.diag(np.ones(len(X)))
        
    return det(X.T.dot(M).dot(Y)) / np.sqrt(det(X.T.dot(M).dot(X)) * det(Y.T.dot(M).dot(Y)))



In [284]:

    
n









    Out[284]:





2000



In [286]:

    
%%prun
bc_mat = np.zeros((n,n))
for i in range(n):
    for j in range(i):
        bc_mat[i,j] = BC(rpft[i],rpft[j])
    if i % 100==0:
        print(i)



In [289]:

    
wbc_mat = np.zeros((n,n))
weight_matrix = np.diag(weights)
for i in range(n):
    for j in range(i):
        wbc_mat[i,j] = BC_w(rpft[i],rpft[j],weight_matrix)
    if i % 100==0:
        print(i)









    



0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900






    



/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/IPython/kernel/__main__.py:6: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.



In [314]:

    
plt.imshow(bc_mat)
plt.figure()
plt.imshow(wbc_mat)









    Out[314]:





<matplotlib.image.AxesImage at 0x20bf55d90>



In [293]:

    
plt.scatter(wbc_mat.flatten(),bc_mat.flatten(),alpha=0.01,linewidths=0)









    Out[293]:





<matplotlib.collections.PathCollection at 0x133ba47d0>



In [309]:

    
plt.plot(weights)









    Out[309]:





[<matplotlib.lines.Line2D at 0x20b590150>]



In [301]:

    
plt.plot(weights**-3)









    Out[301]:





[<matplotlib.lines.Line2D at 0x142e24c10>]



In [308]:

    
plt.plot(np.exp(-weights))









    Out[308]:





[<matplotlib.lines.Line2D at 0x20b337410>]



In [246]:

    
%prun BC(t[0].xyz[0],t[1].xyz[0])



In [240]:

    
%prun [compute_atomwise_deviation_xyz(t[0].xyz[0],t[1].xyz[0]) for i in range(1000)]



In [239]:

    
t[0].xyz[0]









    Out[239]:





array([[ 1.11600006, -1.08800006,  1.04900002],
       [ 1.13700008, -0.96800005,  1.028     ],
       [ 1.05200005, -1.12600005,  1.18000007],
       [ 1.01600003, -1.03800011,  1.23200011],
       [ 0.97300005, -1.20000005,  1.16300011],
       [ 1.12100005, -1.18000007,  1.24600005],
       [ 1.12800002, -1.18200004,  0.95800006],
       [ 1.13200009, -1.27800012,  0.98900002],
       [ 1.22500002, -1.15900004,  0.83800006],
       [ 1.31500006, -1.10600007,  0.86900002],
       [ 1.2750001 , -1.29900002,  0.79700005],
       [ 1.32800007, -1.34100008,  0.88200003],
       [ 1.34800005, -1.27900004,  0.71900004],
       [ 1.18800008, -1.36100006,  0.77300006],
       [ 1.15900004, -1.07600009,  0.72700006],
       [ 1.22800004, -0.99300003,  0.66300005],
       [ 1.02900004, -1.10900009,  0.70700002],
       [ 0.97500002, -1.1730001 ,  0.76300001],
       [ 0.95600003, -1.06400001,  0.58700001],
       [ 1.01800001, -1.0660001 ,  0.49800003],
       [ 0.84000003, -1.16500008,  0.57100004],
       [ 0.88500005, -1.26100004,  0.54400003],
       [ 0.77300006, -1.171     ,  0.65700001],
       [ 0.78000003, -1.13300002,  0.48500001],
       [ 0.91900003, -0.91500002,  0.59300005],
       [ 0.94300002, -0.84100002,  0.49900001],
       [ 0.87200004, -0.87300003,  0.71100003],
       [ 0.85600007, -0.94500005,  0.78000003],
       [ 0.81300002, -0.74200004,  0.74200004],
       [ 0.75400001, -0.74900001,  0.83300006],
       [ 0.92300004, -0.648     ,  0.76500005],
       [ 0.97900003, -0.625     ,  0.67400002],
       [ 0.88600004, -0.55400002,  0.80700004],
       [ 0.99000007, -0.68500006,  0.84200007],
       [ 0.70000005, -0.69600004,  0.64200002],
       [ 0.69000006, -0.574     ,  0.61300004],
       [ 0.63000005, -0.79000002,  0.57800001],
       [ 0.62900001, -0.88200003,  0.62100005],
       [ 0.52000004, -0.76700002,  0.48000002],
       [ 0.55400002, -0.68700004,  0.41300002],
       [ 0.50300002, -0.89100003,  0.39100003],
       [ 0.59800005, -0.91800004,  0.34600002],
       [ 0.47600001, -0.98100007,  0.44600001],
       [ 0.42800003, -0.87800002,  0.31300002],
       [ 0.39000002, -0.71900004,  0.54100001],
       [ 0.28500003, -0.78200006,  0.52600002],
       [ 0.38300002, -0.59400004,  0.58900005],
       [ 0.46900001, -0.54300004,  0.58000004],
       [ 0.259     , -0.52000004,  0.62300003],
       [ 0.192     , -0.58100003,  0.68400002],
       [ 0.30000001, -0.39700001,  0.708     ],
       [ 0.21400002, -0.33200002,  0.72200006],
       [ 0.384     , -0.34500003,  0.66100001],
       [ 0.33400002, -0.42400002,  0.80800003],
       [ 0.19800001, -0.45900002,  0.49500003],
       [ 0.27600002, -0.42100003,  0.40300003],
       [ 0.066     , -0.45000002,  0.48300001],
       [ 0.01      , -0.47100002,  0.56400001],
       [-0.008     , -0.45400003,  0.35800001],
       [ 0.04      , -0.53200001,  0.30000001],
       [-0.142     , -0.50300002,  0.39400002],
       [-0.132     , -0.59200001,  0.45600003],
       [-0.19600001, -0.53000003,  0.303     ],
       [-0.19500001, -0.42300001,  0.44600001],
       [-0.008     , -0.324     ,  0.28200001],
       [-0.017     , -0.32600001,  0.162     ],
       [ 0.016     , -0.208     ,  0.34600002],
       [ 0.032     , -0.21000001,  0.44600001],
       [ 0.03      , -0.068     ,  0.28800002],
       [-0.071     , -0.043     ,  0.25500003],
       [ 0.072     ,  0.023     ,  0.40100002],
       [ 0.16700001, -0.004     ,  0.44700003],
       [-0.006     ,  0.023     ,  0.47800002],
       [ 0.083     ,  0.126     ,  0.36700001],
       [ 0.11700001, -0.056     ,  0.171     ],
       [ 0.082     ,  0.025     ,  0.08400001],
       [ 0.22700001, -0.13500001,  0.16700001],
       [ 0.24400002, -0.18300001,  0.25500003],
       [ 0.32300001, -0.15700001,  0.062     ],
       [ 0.39200002, -0.072     ,  0.06500001],
       [ 0.40600002, -0.28500003,  0.09500001],
       [ 0.48100004, -0.30500001,  0.019     ],
       [ 0.46000001, -0.266     ,  0.18800001],
       [ 0.33800003, -0.36900002,  0.10600001],
       [ 0.26500002, -0.16900001, -0.08400001],
       [ 0.32900003, -0.125     , -0.18400002],
       [ 0.13900001, -0.21700001, -0.093     ],
       [ 0.109     , -0.259     , -0.007     ],
       [ 0.055     , -0.223     , -0.215     ],
       [ 0.108     , -0.28300002, -0.289     ],
       [-0.067     , -0.29900002, -0.17      ],
       [-0.13100001, -0.24900001, -0.098     ],
       [-0.032     , -0.39100003, -0.123     ],
       [-0.15200001, -0.34900001, -0.28600001],
       [-0.08400001, -0.39800003, -0.35600001],
       [-0.19900002, -0.266     , -0.33900002],
       [-0.26500002, -0.44000003, -0.252     ],
       [-0.324     , -0.44500002, -0.34300002],
       [-0.31500003, -0.40100002, -0.163     ],
       [-0.21800001, -0.57800001, -0.23400001],
       [-0.17200001, -0.62200004, -0.31200001],
       [-0.21900001, -0.648     , -0.115     ],
       [-0.17      , -0.76700002, -0.12      ],
       [-0.13900001, -0.80500007, -0.20900001],
       [-0.163     , -0.82800007, -0.04      ],
       [-0.26500002, -0.59600002, -0.004     ],
       [-0.30000001, -0.50200003, -0.005     ],
       [-0.24200001, -0.63500005,  0.086     ],
       [ 0.021     , -0.085     , -0.28100002],
       [-0.022     , -0.07700001, -0.39600003],
       [ 0.036     ,  0.025     , -0.21000001],
       [ 0.063     ,  0.021     , -0.112     ],
       [-0.017     ,  0.156     , -0.24200001],
       [-0.002     ,  0.18100001, -0.347     ],
       [-0.17300001,  0.163     , -0.22100002],
       [-0.208     ,  0.19500001, -0.123     ],
       [-0.21700001,  0.23100001, -0.294     ],
       [-0.21300001,  0.063     , -0.23800001],
       [ 0.059     ,  0.27200001, -0.18000001],
       [ 0.021     ,  0.38700002, -0.20300001],
       [ 0.17900001,  0.252     , -0.11800001],
       [ 0.21700001,  0.16000001, -0.10700001],
       [ 0.24400002,  0.36200002, -0.039     ],
       [ 0.16600001,  0.42900002, -0.003     ],
       [ 0.31200001,  0.30500001,  0.085     ],
       [ 0.34100002,  0.38900003,  0.149     ],
       [ 0.252     ,  0.22600001,  0.132     ],
       [ 0.40500003,  0.257     ,  0.056     ],
       [ 0.33200002,  0.45200002, -0.12800001],
       [ 0.45300001,  0.45600003, -0.108     ],
       [ 0.27200001,  0.51300001, -0.23600002],
       [ 0.17200001,  0.50400001, -0.245     ],
       [ 0.35300002,  0.59500003, -0.333     ],
       [ 0.44200003,  0.62900001, -0.28100002],
       [ 0.38900003,  0.48500001, -0.44400004],
       [ 0.48800004,  0.52000004, -0.47400004],
       [ 0.41000003,  0.38800001, -0.40100002],
       [ 0.32600001,  0.49100003, -0.53400004],
       [ 0.27500001,  0.71700001, -0.38800001],
       [ 0.155     ,  0.71700001, -0.38900003],
       [ 0.34300002,  0.82200003, -0.42400002],
       [ 0.44400004,  0.82400006, -0.42800003],
       [ 0.27000001,  0.94100004, -0.46900001],
       [ 0.21300001,  0.98500007, -0.38600001],
       [ 0.37400001,  1.04500008, -0.523     ],
       [ 0.32900003,  1.13900006, -0.55600005],
       [ 0.43300003,  1.        , -0.60400003],
       [ 0.44800001,  1.05800009, -0.44400004],
       [ 0.16600001,  0.92500007, -0.58700001],
       [ 0.067     ,  1.00300002, -0.59300005],
       [ 0.19000001,  0.83100003, -0.68200004],
       [ 0.26800001,  0.76800001, -0.69100004],
       [ 0.08000001,  0.79100001, -0.78100002],
       [ 0.034     ,  0.87300003, -0.83700001],
       [ 0.149     ,  0.69800001, -0.88500005],
       [ 0.24600001,  0.74100006, -0.90900004],
       [ 0.16600001,  0.597     , -0.84600002],
       [ 0.06900001,  0.68100005, -1.00999999],
       [-0.021     ,  0.62300003, -0.98900002],
       [ 0.029     ,  0.77400005, -1.0480001 ],
       [ 0.14      ,  0.62600005, -1.12600005],
       [ 0.078     ,  0.58100003, -1.20400012],
       [ 0.2       ,  0.54300004, -1.08900011],
       [ 0.24400002,  0.71400005, -1.18100011],
       [ 0.33200002,  0.71400005, -1.13100004],
       [ 0.23200001,  0.80300003, -1.27200007],
       [ 0.32500002,  0.89400005, -1.29900002],
       [ 0.41300002,  0.89400005, -1.25100005],
       [ 0.30100003,  0.96000004, -1.3720001 ],
       [ 0.14      ,  0.78100002, -1.36500001],
       [ 0.08000001,  0.70100003, -1.35200012],
       [ 0.11700001,  0.84500003, -1.44000006],
       [-0.05      ,  0.72700006, -0.71800005],
       [-0.15200001,  0.71700001, -0.79000002],
       [-0.053     ,  0.69500005, -0.58700001],
       [ 0.034     ,  0.71200001, -0.53800005],
       [-0.15800001,  0.61300004, -0.52500004],
       [-0.24900001,  0.62700003, -0.58400005],
       [-0.11400001,  0.46500003, -0.52500004],
       [-0.063     ,  0.43100002, -0.61500001],
       [-0.056     ,  0.44700003, -0.43500003],
       [-0.20300001,  0.40400001, -0.51700002],
       [-0.208     ,  0.66000003, -0.38000003],
       [-0.30000001,  0.60200006, -0.324     ],
       [-0.134     ,  0.74700004, -0.31600001],
       [-0.041     ,  0.76800001, -0.35200003],
       [-0.15400001,  0.81400001, -0.18900001],
       [-0.06500001,  0.86800003, -0.15700001],
       [-0.26700002,  0.91600007, -0.20900001],
       [-0.354     ,  0.88500005, -0.266     ],
       [-0.31200001,  0.94700003, -0.115     ],
       [-0.22000001,  1.00100005, -0.259     ],
       [-0.16900001,  0.72700006, -0.059     ],
       [-0.25800002,  0.75500005,  0.016     ],
       [-0.094     ,  0.61400002, -0.051     ],
       [-0.026     ,  0.59900004, -0.123     ],
       [-0.104     ,  0.514     ,  0.058     ],
       [-0.055     ,  0.42200002,  0.024     ],
       [-0.03      ,  0.56700003,  0.17500001],
       [ 0.07      ,  0.597     ,  0.14600001],
       [-0.016     ,  0.49300003,  0.25400001],
       [-0.085     ,  0.64700001,  0.22400001],
       [-0.24900001,  0.47100002,  0.09100001],
       [-0.29200003,  0.45200002,  0.20300001],
       [-0.33200002,  0.44400004, -0.014     ],
       [-0.29100001,  0.44900003, -0.10700001],
       [-0.47900003,  0.42000002,  0.003     ],
       [-0.52500004,  0.51500005,  0.027     ],
       [-0.53600001,  0.38600001, -0.13600001],
       [-0.53500003,  0.47800002, -0.19400001],
       [-0.64000005,  0.35500002, -0.126     ],
       [-0.47400004,  0.31200001, -0.18700001],
       [-0.53200001,  0.324     ,  0.109     ],
       [-0.634     ,  0.347     ,  0.17600001],
       [-0.46000001,  0.21300001,  0.14300001],
       [-0.37400001,  0.19600001,  0.09200001],
       [-0.49600002,  0.116     ,  0.25      ],
       [-0.59600002,  0.081     ,  0.22500001],
       [-0.40800002, -0.015     ,  0.23900001],
       [-0.44300002, -0.08400001,  0.31600001],
       [-0.30500001,  0.009     ,  0.264     ],
       [-0.40000001, -0.089     ,  0.1       ],
       [-0.35700002, -0.017     ,  0.03      ],
       [-0.34500003, -0.18100001,  0.11700001],
       [-0.528     , -0.13900001,  0.035     ],
       [-0.5       , -0.162     , -0.068     ],
       [-0.59800005, -0.055     ,  0.044     ],
       [-0.58900005, -0.25600001,  0.10200001],
       [-0.53800005, -0.30500001,  0.17400001],
       [-0.70300001, -0.31      ,  0.068     ],
       [-0.73900002, -0.43000001,  0.098     ],
       [-0.70000005, -0.47900003,  0.178     ],
       [-0.82900006, -0.46200001,  0.06500001],
       [-0.78700006, -0.257     , -0.011     ],
       [-0.76600003, -0.16500001, -0.047     ],
       [-0.86400002, -0.30900002, -0.05      ],
       [-0.50100005,  0.171     ,  0.39700001],
       [-0.56      ,  0.11100001,  0.48300001],
       [-0.42300001,  0.27700001,  0.42100003],
       [-0.39200002,  0.32900003,  0.34      ],
       [-0.40200001,  0.35000002,  0.54800004],
       [-0.41900003,  0.27200001,  0.62200004],
       [-0.25300002,  0.38500002,  0.55200005],
       [-0.19600001,  0.29300001,  0.53800005],
       [-0.24800001,  0.45800003,  0.47200003],
       [-0.215     ,  0.43900001,  0.63900006],
       [-0.49200001,  0.47600001,  0.56600004],
       [-0.49500003,  0.53100002,  0.67600006],
       [-0.55900002,  0.53100002,  0.45900002],
       [-0.55700004,  0.48900002,  0.36700001],
       [-0.63000005,  0.65200001,  0.46100003],
       [-0.57000005,  0.72400004,  0.51700002],
       [-0.64500004,  0.71100003,  0.32000002],
       [-0.70100003,  0.80400002,  0.33400002],
       [-0.54800004,  0.73200005,  0.27500001],
       [-0.71500003,  0.64500004,  0.26800001],
       [-0.77300006,  0.648     ,  0.537     ],
       [-0.86900002,  0.58500004,  0.48400003],
       [-0.77500004,  0.69600004,  0.66300005],
       [-0.68600005,  0.72800004,  0.69800001],
       [-0.88600004,  0.69300002,  0.75500005],
       [-0.86500007,  0.76600003,  0.83300006],
       [-0.97200006,  0.72600001,  0.69600004],
       [-0.90200007,  0.60200006,  0.81200004]], dtype=float32)



In [188]:

    
weights.shape









    Out[188]:





(12,)



In [ ]:

    
mini_medoids = MiniBatchKMedoids(metric=RMSD)



In [313]:

    
plt.imshow(wrmsd_subset)









    Out[313]:





<matplotlib.image.AxesImage at 0x20b691ed0>



In [151]:

    
from sklearn.cluster import MiniBatchKMeans,



In [ ]:

    
kmeans = MiniBatchKMeans(