In this example, we plot a 2D projection of a protein dynamics dataset computed
with tICA
. The example also demonstrates the use of a Pipeline
object to
combine a featurizer (AtomPairsFeaturizer
) and an estimator (tICA
).
In [ ]:
from __future__ import print_function
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm
from msmbuilder.decomposition import tICA
from msmbuilder.example_datasets import fetch_met_enkephalin
from msmbuilder.featurizer import AtomPairsFeaturizer
from sklearn.pipeline import Pipeline
In [ ]:
dataset = fetch_met_enkephalin()
print(dataset.DESCR)
In [ ]:
def fit_and_plot(pipeline, trajectories):
transformed = pipeline.fit_transform(trajectories)
transformed = np.concatenate(transformed)
print('Eiegenvalue sum', pipeline.named_steps['tica'].eigenvalues_.sum())
x = transformed[:, 0]
y = transformed[:, 1]
plt.axes(axisbg='w')
plt.grid(False)
plt.hist2d(x, y, bins=100, cmap='hot_r', norm=LogNorm())
plt.xlabel('1st tIC')
plt.ylabel('2nd tIC')
plt.title('tICA Heatmap (log color scale)')
plt.colorbar()
In [ ]:
# Get all pairs of non-hydrogen atoms
from itertools import combinations
heavy_atoms = dataset.trajectories[0].topology.select_atom_indices('heavy')
heavy_pairs = list(combinations(heavy_atoms, 2))
pipeline1 = Pipeline([
('feat', AtomPairsFeaturizer(heavy_pairs)),
('tica', tICA(n_components=2)),
])
fit_and_plot(pipeline1, dataset.trajectories)
In [ ]: