In this example, we plot a 2D projection of a protein dynamics dataset computed with tICA. The example also demonstrates the use of a Pipeline object to combine a featurizer (AtomPairsFeaturizer) and an estimator (tICA).


In [ ]:
from __future__ import print_function
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm

from msmbuilder.decomposition import tICA
from msmbuilder.example_datasets import fetch_met_enkephalin
from msmbuilder.featurizer import AtomPairsFeaturizer
from sklearn.pipeline import Pipeline

In [ ]:
dataset = fetch_met_enkephalin()
print(dataset.DESCR)

In [ ]:
def fit_and_plot(pipeline, trajectories):
    transformed = pipeline.fit_transform(trajectories)
    transformed = np.concatenate(transformed)

    print('Eiegenvalue sum', pipeline.named_steps['tica'].eigenvalues_.sum())

    x = transformed[:, 0]
    y = transformed[:, 1]

    plt.axes(axisbg='w')
    plt.grid(False)
    plt.hist2d(x, y, bins=100, cmap='hot_r', norm=LogNorm())
    plt.xlabel('1st tIC')
    plt.ylabel('2nd tIC')
    plt.title('tICA Heatmap (log color scale)')
    plt.colorbar()

In [ ]:
# Get all pairs of non-hydrogen atoms
from itertools import combinations
heavy_atoms = dataset.trajectories[0].topology.select_atom_indices('heavy')
heavy_pairs = list(combinations(heavy_atoms, 2))

 
pipeline1 = Pipeline([
    ('feat', AtomPairsFeaturizer(heavy_pairs)),
    ('tica', tICA(n_components=2)),
])

fit_and_plot(pipeline1, dataset.trajectories)

In [ ]: