In [37]:
from msmbuilder.msm import MarkovStateModel
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.featurizer import RawPositionsFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.dataset import dataset
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import KFold
from sklearn.grid_search import RandomizedSearchCV
from scipy.stats.distributions import randint
from msmbuilder.cluster import MiniBatchKMeans
import matplotlib.pyplot as pp
import numpy as np
from mdtraj.utils import timing
import mdtraj as md
%matplotlib inline
from matplotlib import pyplot as plt
In [38]:
with timing("Loading alanine as dataset..."):
xyz_ala = dataset("/Users/je714/msmbuilder_data/alanine_dipeptide/*.dcd",
topology="/Users/je714/msmbuilder_data/alanine_dipeptide/ala2.pdb")
In [39]:
dihed_feat = DihedralFeaturizer(types=['phi', 'psi'])
diheds = dihed_feat.fit_transform(xyz_ala)
In [43]:
rawpos_feat = RawPositionsFeaturizer()
rawpos = rawpos_feat.fit_transform(xyz_ala)
In [60]:
tica_model = tICA(n_components=4)
tica_trajs_diheds = tica_model.fit_transform(diheds)
tica_trajs_rawpos = tica_model.fit_transform(rawpos)
In [61]:
def plot_tica(tica_traj):
txx = np.concatenate(tica_traj)
plt.figure(figsize=(14, 4))
plt.subplot(1, 2, 1)
plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1)
plt.xlabel('tIC 1')
plt.ylabel('tIC 2')
cb = plt.colorbar()
cb.set_label('log10(N)')
plt.subplot(1, 2, 2)
plt.hexbin(txx[:,2], txx[:,3], bins='log', mincnt=1)
plt.xlabel('tIC 3')
plt.ylabel('tIC 4')
cb = plt.colorbar()
cb.set_label('log10(N)')
plt.tight_layout()
In [106]:
plot_tica(tica_trajs_diheds)
plot_tica(tica_trajs_rawpos)
In [109]:
clusterer = MiniBatchKMeans(n_clusters=500)
clustered_trajs_diheds = clusterer.fit_transform(tica_trajs_diheds)
In [ ]:
In [ ]: