Related work: "Low-dimensional, free-energy landscapes of protein-folding reactions by nonlinear dimensionality reduction" (Das et al., 2006)
Questions:
In [8]:
from __future__ import print_function
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm
from msmbuilder.decomposition import tICA
from msmbuilder.example_datasets import fetch_met_enkephalin
from msmbuilder.featurizer import AtomPairsFeaturizer
from sklearn.pipeline import Pipeline
%matplotlib inline
In [9]:
dataset = fetch_met_enkephalin()
print(dataset.DESCR)
In [10]:
def fit_and_plot(pipeline, trajectories):
transformed = pipeline.fit_transform(trajectories)
transformed = np.concatenate(transformed)
print('Eiegenvaue sum', pipeline.named_steps['tica'].eigenvalues_.sum())
x = transformed[:, 0]
y = transformed[:, 1]
plt.axes(axisbg='w')
plt.grid(False)
plt.hist2d(x, y, bins=100, cmap='hot_r', norm=LogNorm())
plt.title('Heatmap (log color scale)')
plt.colorbar()
In [11]:
from itertools import combinations
heavy_atoms = dataset.trajectories[0].topology.select_atom_indices('heavy')
heavy_pairs = list(combinations(heavy_atoms, 2))
pipeline1 = Pipeline([
('feat', AtomPairsFeaturizer(heavy_pairs)),
('tica', tICA(gamma=0, n_components=2)),
('msm', MarkovStateModel())
])
fit_and_plot(pipeline1, dataset.trajectories)
In [12]:
from msmbuilder.msm import MarkovStateModel
In [13]:
from sklearn import manifold
In [14]:
im = manifold.Isomap()
In [15]:
pipeline2 = Pipeline([
('feat', AtomPairsFeaturizer(heavy_pairs)),
('tica', tICA(n_components=2))
])
In [16]:
pipeline3 = Pipeline([
('feat', AtomPairsFeaturizer(heavy_pairs)),
('tica', tICA(n_components=2)),
('msm',MarkovStateModel())
])
In [17]:
traj = dataset.trajectories
fit_and_plot(pipeline2, traj)
In [67]:
tica = tICA(n_components=780)
In [68]:
feat = AtomPairsFeaturizer(heavy_pairs)
featurized = feat.fit_transform(traj)
In [68]:
In [69]:
len(featurized)
Out[69]:
In [70]:
featurized[0].shape
Out[70]:
In [71]:
Y = tica.fit_transform(featurized)
In [72]:
len(Y)
Out[72]:
In [73]:
pl.plot(tica.eigenvalues_)
Out[73]:
In [74]:
tica_transform = tica.transform(featurized)
In [75]:
X_ = np.vstack(tica_transform)
X_.shape
Out[75]:
In [76]:
from sklearn.decomposition import PCA
In [77]:
pca = PCA()
In [78]:
pca.fit(X_)
Out[78]:
In [79]:
pl.plot(pca.explained_variance_ratio_)
Out[79]:
In [81]:
pl.hist2d(pca.transform(X_)[:,0],pca.transform(X_)[:,1],bins=50);
In [83]:
pl.hist2d(tica.transform([X_])[:,0],tica.transform([X_])[:,1],bins=50);
In [30]:
import pylab as pl
pl.plot(tica.timescales_)
Out[30]:
In [28]:
len(Y)
Out[28]:
In [22]:
Y[0].shape
Out[22]:
In [23]:
Y_stack = np.vstack(Y)
In [24]:
Y_im_t = im.fit_transform(Y_stack.T)
plt.hist2d(Y_im_t[:,0],Y_im_t[:,1],bins=50);
In [60]:
plt.scatter(Y_im_t[:,0],Y_im_t[:,1]);
In [33]:
featurized_stack = np.vstack(featurized)
In [38]:
Y_im = im.fit_transform(featurized_stack.T)
In [39]:
Y_im.shape
Out[39]:
In [41]:
plt.hist2d(Y_im[:,0],Y_im[:,1],bins=50);
In [ ]: