In [ ]:
# Download example dataset
from msmbuilder.example_datasets import FsPeptide
fs_peptide = FsPeptide()
fs_peptide.cache()
In [ ]:
# Work in a temporary directory
import tempfile
import os
os.chdir(tempfile.mkdtemp())
In [ ]:
from msmbuilder.dataset import dataset
xyz = dataset(fs_peptide.data_dir + "/*.xtc",
topology=fs_peptide.data_dir + '/fs_peptide.pdb')
len(xyz)
The raw (x, y, z)
coordinates from the simulation do not respect the translational and rotational symmetry of our problem. A Featurizer
transforms cartesian coordinates into other representations. Here we use the DihedralFeaturizer
to turn our data into phi and psi dihedral angles. Observe that the 264*3
-dimensional space is reduced to 84 dimensions
In [ ]:
from msmbuilder.featurizer import DihedralFeaturizer
featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy')
print(xyz[0].xyz.shape)
print(diheds[0].shape)
In [ ]:
from msmbuilder.decomposition import tICA
tica_model = tICA(lag_time=1, n_components=4)
# fit and transform can be done in seperate steps:
tica_model = diheds.fit_with(tica_model)
tica_trajs = diheds.transform_with(tica_model, 'ticas/', fmt='dir-npy')
print(diheds[0].shape)
print(tica_trajs[0].shape)
In [ ]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
txx = np.concatenate(tica_trajs)
plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1)
In [ ]:
from msmbuilder.cluster import MiniBatchKMeans
clusterer = MiniBatchKMeans(n_clusters=100)
clustered_trajs = tica_trajs.fit_transform_with(clusterer,
'kmeans/',
fmt='dir-npy')
print(tica_trajs[0].shape)
print(clustered_trajs[0].shape)
In [ ]:
plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1)
plt.scatter(clusterer.cluster_centers_[:,0],
clusterer.cluster_centers_[:,1],
s=100, c='w')
In [ ]:
from msmbuilder.msm import MarkovStateModel
from msmbuilder.utils import dump
msm = MarkovStateModel(lag_time=5)
msm.fit(clustered_trajs)
In [ ]:
plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1, cmap="Greys")
plt.scatter(clusterer.cluster_centers_[:,0],
clusterer.cluster_centers_[:,1],
s=1e4 * msm.populations_, # size by population
c=msm.left_eigenvectors_[:,1], # color by eigenvector
cmap="RdBu")
plt.colorbar(label='First dynamical eigenvector')
plt.xlabel('tIC 1')
plt.ylabel('tIC 2')
plt.tight_layout()
In [ ]:
from msmbuilder.lumping import PCCAPlus
pcca = PCCAPlus.from_msm(msm, n_macrostates=5)
macro_trajs = pcca.transform(clustered_trajs)
In [ ]:
plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1, cmap="Greys")
plt.scatter(clusterer.cluster_centers_[:,0],
clusterer.cluster_centers_[:,1],
s=100,
c=pcca.microstate_mapping_,
)
plt.xlabel('tIC 1')
plt.ylabel('tIC 2')
In [ ]: