In [ ]:
# Download example dataset
from msmbuilder.example_datasets import FsPeptide
fs_peptide = FsPeptide(verbose=False)
xyz = fs_peptide.get().trajectories
print(fs_peptide.description())
Since the data was saved at 50 ps / frame, we only load every 10th frame (with a new frequency of 0.5/ns).
In [ ]:
xyz = [t[::10] for t in xyz]
print("{} trajectories".format(len(xyz)))
# msmbuilder does not keep track of units! You must keep track of your
# data's timestep
to_ns = 0.5
print("with length {} ns".format(set(len(x)*to_ns for x in xyz)))
The raw (x, y, z)
coordinates from the simulation do not respect the translational and rotational symmetry of our problem. A Featurizer
transforms cartesian coordinates into other representations. Here we use the DihedralFeaturizer
to turn our data into phi and psi dihedral angles. Observe that the 264*3
-dimensional space is reduced to 84 dimensions
In [ ]:
from msmbuilder.featurizer import DihedralFeaturizer
featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = featurizer.fit_transform(xyz)
print(xyz[0].xyz.shape)
print(diheds[0].shape)
In [ ]:
from msmbuilder.preprocessing import RobustScaler
scaler = RobustScaler()
scaled_diheds = scaler.fit_transform(diheds)
print(diheds[0].shape)
print(scaled_diheds[0].shape)
In [ ]:
from msmbuilder.decomposition import tICA
tica_model = tICA(lag_time=2, n_components=4)
# fit and transform can be done in seperate steps:
tica_model.fit(diheds)
tica_trajs = tica_model.transform(diheds)
print(diheds[0].shape)
print(tica_trajs[0].shape)
In [ ]:
%matplotlib inline
import msmexplorer as msme
import numpy as np
txx = np.concatenate(tica_trajs)
_ = msme.plot_histogram(txx)
In [ ]:
from msmbuilder.cluster import MiniBatchKMeans
clusterer = MiniBatchKMeans(n_clusters=100, random_state=42)
clustered_trajs = clusterer.fit_transform(tica_trajs)
print(tica_trajs[0].shape)
print(clustered_trajs[0].shape)
In [ ]:
from matplotlib import pyplot as plt
plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1, cmap='viridis')
plt.scatter(clusterer.cluster_centers_[:,0],
clusterer.cluster_centers_[:,1],
s=100, c='w')
In [ ]:
from msmbuilder.msm import MarkovStateModel
msm = MarkovStateModel(lag_time=2, n_timescales=20)
msm.fit(clustered_trajs)
In [ ]:
assignments = clusterer.partial_transform(txx)
assignments = msm.partial_transform(assignments)
msme.plot_free_energy(txx, obs=(0, 1), n_samples=10000,
pi=msm.populations_[assignments],
xlabel='tIC 1', ylabel='tIC 2')
plt.scatter(clusterer.cluster_centers_[msm.state_labels_, 0],
clusterer.cluster_centers_[msm.state_labels_, 1],
s=1e4 * msm.populations_, # size by population
c=msm.left_eigenvectors_[:, 1], # color by eigenvector
cmap="coolwarm",
zorder=3)
plt.colorbar(label='First dynamical eigenvector')
plt.tight_layout()
In [ ]:
msm.timescales_
In [ ]:
msme.plot_timescales(msm, n_timescales=5, ylabel='Implied Timescales ($ns$)')
In [ ]:
from msmbuilder.lumping import PCCAPlus
pcca = PCCAPlus.from_msm(msm, n_macrostates=4)
macro_trajs = pcca.transform(clustered_trajs)
In [ ]:
msme.plot_free_energy(txx, obs=(0, 1), n_samples=10000,
pi=msm.populations_[assignments],
xlabel='tIC 1', ylabel='tIC 2')
plt.scatter(clusterer.cluster_centers_[msm.state_labels_, 0],
clusterer.cluster_centers_[msm.state_labels_, 1],
s=50,
c=pcca.microstate_mapping_,
zorder=3
)
plt.xlabel('tIC 1')
plt.ylabel('tIC 2')
plt.tight_layout()