In [1]:
import mdtraj as md
import numpy as np
import numpy as np
from matplotlib import pyplot as plt
from mdtraj.utils import timing
import msmbuilder as mb
from msmbuilder.example_datasets import load_doublewell
from msmbuilder.cluster import NDGrid
from msmbuilder.msm import BayesianMarkovStateModel, MarkovStateModel
%matplotlib inline
In [2]:
trjs = load_doublewell(random_state=0)['trajectories']
plt.hist(np.concatenate(trjs), bins=50, log=True)
plt.ylabel('Frequency')
plt.show()
In [3]:
from __future__ import print_function
import os
from matplotlib.pyplot import *
from msmbuilder.featurizer import SuperposeFeaturizer
from msmbuilder.example_datasets import AlanineDipeptide
from msmbuilder.hmm import GaussianFusionHMM
from msmbuilder.cluster import KCenters
from msmbuilder.msm import MarkovStateModel
In [4]:
print(AlanineDipeptide.description())
dataset = AlanineDipeptide().get()
trajectories = dataset.trajectories
topology = trajectories[0].topology
indices = [atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N']]
featurizer = SuperposeFeaturizer(indices, trajectories[0][0])
sequences = featurizer.transform(trajectories)
In [5]:
len(sequences)
Out[5]:
In [6]:
lag_times = [1, 10, 20, 30, 40]
hmm_ts0 = {}
hmm_ts1 = {}
n_states = [3, 5]
for n in n_states:
hmm_ts0[n] = []
hmm_ts1[n] = []
for lag_time in lag_times:
strided_data = [s[i::lag_time] for s in sequences for i in range(lag_time)]
hmm = GaussianFusionHMM(n_states=n, n_features=sequences[0].shape[1], n_init=1).fit(strided_data)
timescales = hmm.timescales_ * lag_time
hmm_ts0[n].append(timescales[0])
hmm_ts1[n].append(timescales[1])
print('n_states=%d\tlag_time=%d\ttimescales=%s' % (n, lag_time, timescales))
print()
In [7]:
figure(figsize=(14,3))
for i, n in enumerate(n_states):
subplot(1,len(n_states),1+i)
plot(lag_times, hmm_ts0[n])
plot(lag_times, hmm_ts1[n])
if i == 0:
ylabel('Relaxation Timescale')
xlabel('Lag Time')
title('%d states' % n)
show()
In [8]:
hmm.score(sequences)
Out[8]:
In [9]:
me_data = mb.example_datasets.MetEnkephalin().get()
In [10]:
fs_data = mb.example_datasets.FsPeptide().get()
In [11]:
dataset = fs_data
In [12]:
trajectories = dataset.trajectories
topology = trajectories[0].topology
indices = [atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N']]
featurizer = SuperposeFeaturizer(indices, trajectories[0][0])
sequences = featurizer.transform(trajectories)
In [13]:
len(sequences)
Out[13]:
In [14]:
sum([len(traj) for traj in sequences])
Out[14]:
In [15]:
sequences[0].shape
Out[15]:
In [17]:
hmm
Out[17]:
In [18]:
hmm.params
Out[18]:
In [19]:
hmm.vars_.shape
Out[19]:
In [20]:
hmm.vars_
Out[20]:
In [21]:
hmm.transmat_
Out[21]:
In [22]:
import pylab as pl
%matplotlib inline
pl.imshow(hmm.transmat_,interpolation='none')
Out[22]:
In [27]:
c=mb.cluster.KMedoids(n_clusters=100)
In [ ]:
c.fit(sequences)
In [ ]: