In [3]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from mdtraj.utils import timing

from msmbuilder.example_datasets import QuadWell
from msmbuilder.msm import BayesianMarkovStateModel
from msmbuilder.msm import ContinuousTimeMSM

In [4]:
import mdtraj as md
trajs = [md.load('bound_trajs/liganded_1.pdb'), md.load('bound_trajs/liganded_2.pdb'), 
         md.load('bound_trajs/liganded_3.pdb'), md.load('bound_trajs/liganded_4.pdb'), 
         md.load('bound_trajs/liganded_5.pdb'), md.load('bound_trajs/liganded_6.pdb'), 
         md.load('bound_trajs/liganded_7.pdb'), md.load('bound_trajs/liganded_8.pdb'), 
         md.load('bound_trajs/liganded_9.pdb'), md.load('bound_trajs/liganded_10.pdb'), 
         md.load('bound_trajs/liganded_11.pdb'), md.load('bound_trajs/liganded_12.pdb'), 
         md.load('bound_trajs/liganded_13.pdb')] 
top = trajs[0].topology
ref = md.load('bound_trajs/bound.pdb')

In [13]:
from msmbuilder.cluster import NDGrid
n_states = 25
lag_time = 10
seqs = NDGrid(n_states).fit_transform(trajs)
print(seqs[0])


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-bb435a071106> in <module>()
      2 n_states = 25
      3 lag_time = 10
----> 4 seqs = NDGrid(n_states).fit_transform(trajs)
      5 print(seqs[0])

/usr/local/lib/python2.7/dist-packages/msmbuilder-3.3.0.dev0-py2.7-linux-x86_64.egg/msmbuilder/cluster/base.pyc in fit_transform(self, sequences, y)
    171     def fit_transform(self, sequences, y=None):
    172         """Alias for fit_predict"""
--> 173         return self.fit_predict(sequences, y)

/usr/local/lib/python2.7/dist-packages/msmbuilder-3.3.0.dev0-py2.7-linux-x86_64.egg/msmbuilder/cluster/base.pyc in fit_predict(self, sequences, y)
    151         """
    152         if hasattr(super(MultiSequenceClusterMixin, self), 'fit_predict'):
--> 153             check_iter_of_sequences(sequences, allow_trajectory=self._allow_trajectory)
    154             labels = super(MultiSequenceClusterMixin, self).fit_predict(sequences)
    155         else:

/usr/local/lib/python2.7/dist-packages/msmbuilder-3.3.0.dev0-py2.7-linux-x86_64.egg/msmbuilder/utils/validation.pyc in check_iter_of_sequences(sequences, allow_trajectory, ndim, max_iter)
     53 
     54     if not value:
---> 55         raise ValueError('sequences must be a list of sequences')
     56 
     57 

ValueError: sequences must be a list of sequences

In [10]:
print sequences[0]


[[ 13.77501011  13.7850132   13.77760887 ...,   0.9748736    1.04400492
    1.1160593 ]
 [ 13.89085484  13.90155983  13.85965252 ...,   1.01191211   1.10150385
    1.17012   ]
 [ 14.04298592   0.63666368  14.07787132 ...,   0.96659875   1.00243461
    1.03516865]
 ..., 
 [ 13.70102119  13.76448727  13.88113403 ...,   1.53108907  13.7317276
   13.50222015]
 [  0.55969614   0.51739442   0.63559663 ...,   1.31514835  13.92131805
    1.05461752]
 [ 13.88770676   0.64290243   0.78151983 ...,   1.30341554   1.16541195
    1.07118559]]

In [11]:
n_states = 25
lag_time = 10
with timing('ContinuousTimeMSM'):
    rates_model = ContinuousTimeMSM(lag_time=lag_time,).fit(sequences)
with timing('BayesianMarkovStateModel'):
    # generating 1000 samples from the distribution, using 2e4 steps between samples
    # to ensure that they're decorrelated
    bayes_model = BayesianMarkovStateModel(lag_time=lag_time, n_samples=int(1e3), n_steps=1e4).fit(sequences)


ContinuousTimeMSM: 0.001 seconds
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-a3339a3f2814> in <module>()
      2 lag_time = 10
      3 with timing('ContinuousTimeMSM'):
----> 4     rates_model = ContinuousTimeMSM(lag_time=lag_time,).fit(sequences)
      5 with timing('BayesianMarkovStateModel'):
      6     # generating 1000 samples from the distribution, using 2e4 steps between samples

/usr/local/lib/python2.7/dist-packages/msmbuilder-3.3.0.dev0-py2.7-linux-x86_64.egg/msmbuilder/msm/ratematrix.pyc in fit(self, sequences, y)
    156 
    157     def fit(self, sequences, y=None):
--> 158         self._build_counts(sequences)
    159         return self._fit()
    160 

/usr/local/lib/python2.7/dist-packages/msmbuilder-3.3.0.dev0-py2.7-linux-x86_64.egg/msmbuilder/msm/ratematrix.pyc in _build_counts(self, sequences)
    132 
    133     def _build_counts(self, sequences):
--> 134         sequences = list_of_1d(sequences)
    135         lag_time = int(self.lag_time)
    136         if lag_time < 1:

/usr/local/lib/python2.7/dist-packages/msmbuilder-3.3.0.dev0-py2.7-linux-x86_64.egg/msmbuilder/utils/validation.pyc in list_of_1d(y)
     18             raise ValueError(
     19                 "Bad input shape. Element %d has shape %s, but "
---> 20                 "should be 1D" % (i, str(value.shape)))
     21         result.append(value)
     22     return result

ValueError: Bad input shape. Element 0 has shape (101, 3114), but should be 1D

In [6]:
%matplotlib inline
from matplotlib.pyplot import *

plot(bayes_model.all_timescales_[:, 0])
xlabel('Iteration')
ylabel('Releaxation timescale')


/usr/local/lib/python2.7/dist-packages/msmbuilder-3.3.0.dev0-py2.7-linux-x86_64.egg/msmbuilder/msm/bayesmsm.py:339: RuntimeWarning: invalid value encountered in log
  timescales = - self.lag_time / np.log(us[:, 1:])
Out[6]:
<matplotlib.text.Text at 0x7faa8fc8d390>

In [ ]:
figure(figsize=(14,4))
grid(False)

# SUBPLOT 1. ContinuousTimeMSM
subplot(1,3,1, axisbg='white')
# the mean and 2-std error bars in the populations
rates_mean = rates_model.populations_
rates_err = 2*rates_model.uncertainty_pi()
print np.arange(n_states)
print len(rates_mean)
print len(rates_err)
# errorbar(x=np.arange(n_states), y=rates_mean, yerr=rates_err, color='b')
# fill_between(x=np.arange(n_states), y1=rates_mean+rates_err, y2=rates_mean-rates_err, color='b', alpha=0.2)
# title('MLE continuous-time MSM')
# xlabel('States')
# ylabel('Populations')

# SUBPLOT 2. ContinuousTimeMSM
subplot(1,3,2, axisbg='white')
# the mean and 2-std error bars in the populations, from
# averaging over the MCMC samples
bayes_mean = np.mean(bayes_model.all_populations_, axis=0)
bayes_err = 2*np.std(bayes_model.all_populations_, axis=0)
print len(bayes_mean)
print len(bayes_err)
print bayes_err
errorbar(x=np.arange(n_states), y=bayes_mean, yerr=bayes_err, c='r')
fill_between(x=np.arange(n_states), y1=bayes_mean+bayes_err, y2=bayes_mean-bayes_err, color='r', alpha=0.2)
title('Bayesian discrete-time MSM (MCMC)')
xlabel('States')
ylabel('Populations')

# SUBPLOT3. Potential energy
subplot(1,3,3, axisbg='white')
title('Potential energy surface')
xlabel('Position')
ylabel('Potential')
# the potential function
potential_x = np.linspace(-1, 1, 200)
V = quadwell.potential(potential_x)
plot(potential_x, V, 'k')

tight_layout()

In [ ]: