In [ ]:
# Download example dataset
from msmbuilder.example_datasets import FsPeptide
fs_peptide = FsPeptide()
xyz = fs_peptide.get().trajectories
print(fs_peptide.description())
Since the data was saved at 50 ps / frame, we only load every 10th frame (with a new frequency of 0.5/ns).
In [ ]:
xyz = [t[::10] for t in xyz]
print("{} trajectories".format(len(xyz)))
# msmbuilder does not keep track of units! You must keep track of your
# data's timestep
to_ns = 0.5
print("with length {} ns".format(set(len(x)*to_ns for x in xyz)))
In [ ]:
from sklearn.pipeline import Pipeline
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.preprocessing import RobustScaler
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel
pipeline = Pipeline([
('feat', DihedralFeaturizer()),
('scaler', RobustScaler()),
('tica', tICA()),
('cluster', MiniBatchKMeans(n_clusters=100, random_state=42)),
('msm', MarkovStateModel()),
])
In [ ]:
pipeline = pipeline.fit(xyz)
In [ ]:
msm = pipeline.named_steps['msm']
In [ ]:
%matplotlib inline
from matplotlib import pyplot as plt
plt.scatter(msm.left_eigenvectors_[:,1],
msm.left_eigenvectors_[:,2],
s=msm.populations_*1e4,
c=msm.left_eigenvectors_[:,1],
cmap='coolwarm')
plt.tight_layout()
In [ ]:
import msmexplorer as msme
msme.plot_timescales(msm, n_timescales=5, ylabel='Implied Timescales ($ns$)')