In [1]:
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
%matplotlib inline
import mdtraj
plt.rc('font', family='serif')
from msmbuilder.example_datasets import FsPeptide
dataset = FsPeptide().get()
fs_trajectories = dataset.trajectories
fs_t = fs_trajectories[0]
In [ ]:
In [486]:
print(dataset.DESCR)
In [489]:
from simtk.unit import *
(50 * picosecond * 1000) / nanosecond
Out[489]:
In [2]:
# idea: define basis functions as centered on conformations, represented in some vector space
In [9]:
from msmbuilder.featurizer import DihedralFeaturizer
dih_model=DihedralFeaturizer()
X = dih_model.fit_transform(fs_trajectories)
In [11]:
from msmbuilder.decomposition import tICA
tica = tICA(lag_time=100,weighted_transform=True)
In [12]:
X = tica.fit_transform(X)
In [14]:
X_ = np.vstack(X)
In [15]:
from sklearn.mixture import GMM
In [191]:
gmm=GMM(20)
In [192]:
gmm.fit(X_[::10])
Out[192]:
In [193]:
gmm.predict_proba(X_[:10]).shape
Out[193]:
In [194]:
basis_exp = gmm.predict_proba(X_)
In [150]:
gmm.means_.shape
Out[150]:
In [133]:
from scipy.stats import norm
np.sum(norm.logpdf(gmm.means_[0],loc=gmm.means_[0],scale=gmm.covars_[0]))
Out[133]:
In [134]:
def compute_basis_exp(points,gmm):
basis_exp = np.zeros((len(points),len(gmm.means_)))
for i in range(len(points)):
for j in range(len(gmm.means_)):
basis_exp[i,j] = np.sum(norm.logpdf(points[i],loc=gmm.means_[j],scale=gmm.covars_[j]))
return basis_exp
In [135]:
basis_exp = compute_basis_exp(X_,gmm)
In [136]:
def compute_S(basis_exp):
return basis_exp.T.dot(basis_exp) / len(basis_exp)
def compute_C(basis_exp,tau=10):
return basis_exp[tau:].T.dot(basis_exp[:-tau]) / len(basis_exp - tau)
In [195]:
S = compute_S(basis_exp)
C = compute_C(basis_exp,tau=1000)
In [196]:
dtrajs = [gmm.predict(x) for x in X]
In [197]:
from msmbuilder.msm import MarkovStateModel
msm = MarkovStateModel(lag_time=100)
msm.fit(dtrajs)
Out[197]:
In [198]:
plt.imshow(msm.transmat_-np.diag(np.ones(len(msm.transmat_))),interpolation='none',cmap='Blues')
Out[198]:
In [214]:
n_states = basis_exp.shape[1]
#A = np.diag(np.ones(n_states))
#A = np.random.randn(n_states,n_states)
#A = msm.transmat_
In [215]:
msm.countsmat_.shape
Out[215]:
In [216]:
P = A.T.dot(C).dot(A)
P.shape
Out[216]:
In [217]:
Q = A.T.dot(S).dot(A)
In [218]:
Q_inv = np.linalg.inv(Q)
In [219]:
gmrq = np.trace(P.dot(Q_inv))
gmrq
Out[219]:
In [220]:
gmrq_no_rates = gmrq
gmrq_no_rates
Out[220]:
In [221]:
gmrq-gmrq_no_rates
Out[221]:
In [222]:
msm.score_
Out[222]:
In [225]:
references = [x[::1000] for x in fs_trajectories]
In [229]:
refs = references[0]
for ref in references[1:]:
refs = refs + ref
In [326]:
basis_exps = []
for traj in fs_trajectories:
rmsd_to_refs = np.zeros((len(traj),len(refs)))
for i in range(len(refs)):
rmsd_to_refs[:,i] = mdtraj.rmsd(traj,refs,i)
basis_exps.append(rmsd_to_refs)
In [271]:
plt.plot(np.exp(-rmsd_to_refs/0.5)[:,20:][:,::10],alpha=0.2);
In [272]:
X = np.exp(-rmsd_to_refs/0.5)
#X = np.random.randn(10000,280)
from sklearn.decomposition import PCA
pca = PCA()
pca.fit(X)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
Out[272]:
In [274]:
from scipy import linalg as la
In [404]:
Cs = []
Ss = []
C_avg = np.zeros((len(X.T),len(X.T)))
S_avg = np.zeros((len(X.T),len(X.T)))
for traj in basis_exps:
X = np.exp(-traj/0.5)
C = compute_C(X,tau=100)
S = compute_S(X)
Cs.append(C)
Ss.append(S)
C_avg += C
S_avg += S
C_avg /= len(basis_exps)
S_avg /= len(basis_exps)
soln = la.eig(C_avg,S_avg)
In [701]:
from MDAnalysis.analysis.rms import rmsd as wRMSD
ref0 = refs.xyz[-1]
weights = np.ones(264)
def compute_rmsd_traj(trajectory,ref,weights):
traj = np.zeros(len(trajectory))
for i in range(len(trajectory)):
traj[i] = wRMSD(ref,trajectory.xyz[i],weights=weights,center='True')
return traj
In [702]:
raw_traj = compute_rmsd_traj(fs_t,ref0,weights)
In [716]:
#weights=np.zeros(264)
atomwise_deviations = np.load('fs_atomwise_deviations_tau=20.npy')
mean = atomwise_deviations.mean(0)
#weights = 1 - mean
weights = np.exp(-mean/0.065)
weighted_traj = compute_rmsd_traj(fs_t,ref0,weights)
inv_mean_weighted_traj = compute_rmsd_traj(fs_t,ref0,1/mean)
mean_weighted_traj = compute_rmsd_traj(fs_t,ref0,mean)
In [717]:
plt.plot(mean)
plt.figure()
plt.plot(1/mean)
plt.figure()
plt.plot(weights)
Out[717]:
In [718]:
plt.plot(raw_traj)
plt.plot(weighted_traj)
plt.plot(mean_weighted_traj)
plt.plot(inv_mean_weighted_traj)
Out[718]:
In [726]:
from statsmodels.tsa.stattools import acf
nlags=3000
autocorr = acf(raw_traj,nlags=nlags)**2
autocorr_w = acf(weighted_traj,nlags=nlags)**2
autocorr_mw = acf(mean_weighted_traj,nlags=nlags)**2
autocorr_imw = acf(inv_mean_weighted_traj,nlags=nlags)**2
plt.plot(autocorr,label='raw rmsd')
print(np.sum(autocorr_w)/np.sum(autocorr),np.sum(autocorr),np.sum(autocorr_w),np.sum(autocorr_mw),np.sum(autocorr_imw))
plt.plot(autocorr_w,label='exp(-rmsd)-weighted rmsd')
plt.plot(autocorr_mw,label='mean-weighted rmsd')
plt.plot(autocorr_imw,label='inv-mean-weighted rmsd')
plt.legend(loc='best')
plt.ylim(0,1)
Out[726]:
In [687]:
deviations = []
for i in range(len(fs_trajectories)):
deviations.append(atomwise_deviations[i*10000:(i+1)*10000])
In [694]:
autocorr = np.array([acf(dev.dot(weights),nlags=9000)**2 for dev in deviations]).T
Out[694]:
In [699]:
plt.plot(autocorr.mean(1)[:100]);
In [407]:
# discarding imaginary components
eigs = np.array(soln[0],dtype=float)
In [408]:
plt.plot(np.cumsum(eigs**2))
Out[408]:
In [409]:
plt.plot(sorted(eigs**2)[::-1],marker='o')
Out[409]:
In [410]:
np.sum(sorted(eigs)[::-1][:20])
Out[410]:
In [494]:
dtrajs_rmsd = [np.argmax(traj,1) for traj in basis_exps]
dtrajs_gauss_rmsd = [np.argmax(np.exp(-traj),1) for traj in basis_exps]
In [ ]:
In [ ]:
In [412]:
from pyemma import msm
m = msm.estimate_markov_model(dtrajs,lag=100)
In [413]:
m.fit(dtrajs)
In [414]:
plt.plot(m.eigenvalues())
plt.hlines(0,0,65)
Out[414]:
In [420]:
sum(sorted(m.eigenvalues())[::-1][:20])
Out[420]:
In [472]:
import pyemma
tica = pyemma.coordinates.tica(basis_exps,lag=100)
plt.plot(np.cumsum(tica.eigenvalues))
np.sum(tica.eigenvalues[:20])
Out[472]:
In [476]:
from msmbuilder.featurizer import DihedralFeaturizer
dih_model = DihedralFeaturizer(types=['phi', 'psi', 'omega', 'chi1', 'chi2', 'chi3', 'chi4'])
X = dih_model.fit_transform(fs_trajectories)
from msmbuilder.decomposition import tICA
tica = tICA(lag_time=100)
tica.fit(X)
plt.plot(np.cumsum(tica.eigenvalues_))
np.sum(tica.eigenvalues_[:20])
Out[476]:
In [499]:
from msmbuilder.cluster import MiniBatchKMedoids
kmed = MiniBatchKMedoids(20)
dtrajs = kmed.fit_transform(X)
In [500]:
m = pyemma.msm.estimate_markov_model(dtrajs,lag=100)
np.sum(m.eigenvalues())
Out[500]:
In [501]:
kmed = MiniBatchKMedoids(20)
tica = tICA(n_components=50,lag_time=100,weighted_transform=True)
dtrajs_tica = kmed.fit_transform(tica.fit_transform(X))
In [507]:
set(dtrajs[1]),set(dtrajs_tica[1])
Out[507]:
In [513]:
lags = range(1,50) + range(50,1000)[::50]
In [514]:
# kmed_raw
kmed_raw = pyemma.msm.its(dtrajs,lags,nits=1,errors='bayes')
# kmed_tica
kmed_tica = pyemma.msm.its(dtrajs_tica,lags,nits=1,errors='bayes')
# rmsd
rmsd = pyemma.msm.its(dtrajs_rmsd,lags,nits=1,errors='bayes')
# gauss_rmsd
gauss_rmsd = pyemma.msm.its(dtrajs_gauss_rmsd,lags,nits=1,errors='bayes')
In [508]:
its = kmed_raw
plt.plot(its.lags,its.get_timescales(),label='Kmed, raw dihedrals')
#plt.fill_between(its.lags,its.get_timescales() + its.
its = kmed_tica
plt.plot(kmed_tica.lags,its.get_timescales(),label='Kmed, tICA-transformed dihedrals')
its = rmsd
plt.plot(its.lags,its.get_timescales(),label='RMSD to references')
its = gauss_rmsd
plt.plot(its.lags,its.get_timescales(),label='np.exp(-RMSD) to references')
plt.yscale('log')
plt.legend(loc='best')
In [512]:
for i in range(4):
plt.plot(np.arange(10)+i,label=str(i))
plt.legend(loc='best')
Out[512]:
In [ ]:
In [490]:
nanosecond / (50* picosecond)
Out[490]:
In [462]:
#C_msm = m.transition_matrix
one_hot_trajs = []
n_states = np.max(np.vstack(dtrajs))+1
C_msm_avg = np.zeros((n_states,n_states))
S_msm_avg = np.zeros((n_states,n_states))
C_msms = []
S_msms = []
for traj in dtrajs:
one_hot_traj = np.zeros((len(traj),n_states))
for i in range(len(traj)):
one_hot_traj[i,traj[i]] = 1
one_hot_trajs.append(one_hot_traj)
C_msm = compute_C(one_hot_traj,tau=100)
S_msm = compute_S(one_hot_traj)
C_msms.append(C_msm)
C_msm_avg += C_msm
S_msms.append(S_msm)
S_msm_avg += S_msm
#C_msm_avg /= len(dtrajs)
#S_msm_avg /= len(dtrajs)
#S_msm = np.diag(m.stationary_distribution)
In [465]:
plt.imshow(np.log(C_msm_avg),interpolation='none')
plt.figure()
plt.imshow(np.log(S_msm_avg),interpolation='none')
Out[465]:
In [460]:
C_msm_avg.shape,S_msm_avg.shape
Out[460]:
In [454]:
msm_soln = la.eig(C_msm_avg,S_msm_avg)
In [455]:
msm_eigs = sorted(np.array(msm_soln[0],dtype=float))[::-1]
msm_eigs[:20]
Out[455]:
In [447]:
sum(msm_eigs[:20])
Out[447]:
In [448]:
plt.imshow(C_msm,interpolation='none')
Out[448]:
In [449]:
msm_soln[0].shape
Out[449]:
In [ ]: