In [1]:
import numpy as np
import numpy.random as npr
npr.seed(0)
import matplotlib.pyplot as plt
%matplotlib inline
In [51]:
def skewed_gaussian_potential(x, eps=0.01):
return (x[0] - x[1])**2/(2*eps) + ((x[0] + x[1])**2/(2))
separation = 0.5
true_free_energy_difference = 1.0
#def mixture_potential(x):
# if (x[1] < x[0]):
# return skewed_gaussian_potential(x-(separation,-separation))
# else:
# return elevation * skewed_gaussian_potential(x+(separation,-separation))
#def mixture(x):
# return np.exp(-mixture_potential(x))
def mixture(x):
return (x[1] < x[0]) * np.exp(-skewed_gaussian_potential(x-(separation,-separation))) + (x[1] > x[0]) * np.exp(true_free_energy_difference-skewed_gaussian_potential(x+(separation,-separation)))
In [40]:
#x_ = np.random.rand(100000,2)*4 - 2
#plt.scatter(x_[:,0],x_[:,1],c=[mixture(point) for point in x_],s=2,linewidths=0,cmap='Blues')
n_grid = 300
linspace = np.linspace(-2,2,n_grid)
X = np.zeros((n_grid, n_grid))
for i in range(n_grid):
for j in range(n_grid):
X[i,j] = mixture(np.array([linspace[i], linspace[j]]))
#X[i,j] = mixture_potential(np.array([linspace[i], linspace[j]]))
plt.contourf(linspace, linspace, X, levels=np.linspace(np.min(X),np.max(X),100), cmap='Blues')
plt.colorbar()
plt.xlim(-2,2)
plt.ylim(-2,2)
#plt.axis('off')
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.title('Equilibrium density')
Out[40]:
In [42]:
def metropolis_hastings(x0, q, n_samples=100, step_size=0.05):
ndim = len(x0)
q0 = q(x0)
samples = np.zeros((n_samples,ndim))
for i in range(n_samples):
x1 = x0 + npr.randn(ndim)*step_size
q1 = q(x1)
if npr.rand() <= q1 / q0:
samples[i] = x1
x0 = x1
q0 = q1
else:
samples[i] = x0
return samples
In [43]:
npr.seed(0)
x = metropolis_hastings(np.array((1,-1)),mixture,100000)
y = metropolis_hastings(np.array((-1,1)),mixture,100000)
In [44]:
plt.plot(x[:,0], x[:,1],linewidth=0.1, color='blue')
plt.figure()
plt.plot(y[:,0], y[:,1],linewidth=0.1, color='green')
plt.figure()
plt.plot(x[:,0], x[:,1],linewidth=0.1)
plt.plot(y[:,0], y[:,1],linewidth=0.1)
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by starting condition)')
Out[44]:
In [45]:
u_kln = np.zeros((2, 2, len(x)))
u_kln[0][0] = np.array([mixture(x_) for x_ in x])
u_kln[0][1] = np.array([mixture(x_) for x_ in y])
u_kln[1][0] = np.array([mixture(x_) for x_ in x])
u_kln[1][1] = np.array([mixture(x_) for x_ in y])
N_k = np.array((len(x), len(y)))
In [46]:
# let's evaluate the free energy difference using MBAR
from pymbar import MBAR
mbar = MBAR(u_kln, N_k)
In [47]:
mbar.f_k
Out[47]:
In [48]:
X = np.vstack((x,y))
X.shape
Out[48]:
In [49]:
import pyemma
In [141]:
#kmeans = pyemma.coordinates.cluster_mini_batch_kmeans([x,y],k=10,max_iter=10)
regtime = pyemma.coordinates.cluster_uniform_time([x,y], k=10)
In [134]:
#dtrajs = [dtraj.flatten() for dtraj in kmeans.get_output()]
In [142]:
dtrajs = [dtraj.flatten() for dtraj in regtime.get_output()]
In [143]:
dtrajs[0]
Out[143]:
In [144]:
c = np.hstack(dtrajs)
In [145]:
len(X), len(c)
Out[145]:
In [149]:
plt.scatter(X[:,0], X[:,1],linewidth=0,c=np.hstack(dtrajs),s=1,cmap='Spectral')
plt.scatter(kmeans.clustercenters[:,0], kmeans.clustercenters[:,1])
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by regular-time clustering assignment)')
Out[149]:
In [150]:
msm = pyemma.msm.estimate_markov_model(dtrajs,1)
In [151]:
plt.imshow(msm.P,interpolation='none',cmap='Blues')
plt.title('MSM transition matrix\n(Trace: {0:.3f})'.format(np.trace(msm.P)))
plt.xlabel('From')
plt.ylabel('To')
plt.colorbar()
plt.figure()
plt.imshow(np.log(msm.P),interpolation='none',cmap='Blues')
plt.title('log(MSM transition matrix)\n(Trace: {0:.3f})'.format(np.trace(msm.P)))
plt.xlabel('From')
plt.ylabel('To')
plt.colorbar()
Out[151]:
In [152]:
np.trace(msm.P)
Out[152]:
In [153]:
msm.active_count_fraction, msm.active_state_fraction
Out[153]:
In [154]:
lags = range(1,100)
its = pyemma.msm.its(dtrajs,lags,nits=5, errors='bayes')
In [155]:
pyemma.plots.plot_implied_timescales(its)
plt.title('Top 5 implied timescales')
Out[155]:
In [159]:
cktest = msm.cktest(2,mlags=100)
In [164]:
pyemma.plots.plot_cktest(cktest)
Out[164]:
In [158]:
# what if we did tICA?
tica = pyemma.coordinates.tica([x,y])
In [67]:
tica.timescales
Out[67]:
In [68]:
tica.eigenvectors[0]
Out[68]:
In [69]:
X_tica = np.vstack(tica.get_output())
X_tica.shape
Out[69]:
In [70]:
plt.scatter(X_tica[:,0], X_tica[:,1], c=c, linewidths=0)
Out[70]:
In [71]:
kmeans = pyemma.coordinates.cluster_mini_batch_kmeans(tica.get_output(),k=20,max_iter=1000)
In [72]:
dtrajs = [dtraj.flatten() for dtraj in kmeans.get_output()]
msm = pyemma.msm.estimate_markov_model(dtrajs,1)
In [73]:
msm.active_count_fraction, msm.active_state_fraction
Out[73]:
In [74]:
np.trace(msm.P)
Out[74]:
In [75]:
plt.scatter(X[:,0], X[:,1],linewidth=0,c=np.hstack(dtrajs),s=1,cmap='Spectral')
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by cluster w.r.t. kinetic-map (tICA) metric)')
Out[75]:
In [76]:
# what's the estimated relative free energy of the two macrostates / would we be able to identify two macrostates?
plt.plot(msm.timescales(),'.')
Out[76]:
In [106]:
%%time
hmm = pyemma.msm.estimate_hidden_markov_model(dtrajs, 2, 1, maxit=1000)
In [107]:
hmm.pi
Out[107]:
In [108]:
f_i = -np.log(sorted(hmm.stationary_distribution))[::-1]
f_i -= f_i.min()
plt.figure()
plt.plot(f_i, '.')
plt.xlabel('Macrostate')
plt.ylabel(r'$\Delta G$ $(k_B T)$')
plt.title('Macrostate free energies')
Out[108]:
In [109]:
f_i
Out[109]:
In [110]:
hmm.observation_probabilities
Out[110]:
In [111]:
# color by hidden state membership: red channel = state 0, blue channel = state 1
dtrajs_ = np.hstack(dtrajs)
rbg = np.zeros((len(dtrajs_), 3))
for i in range(len(dtrajs_)):
p_0 = hmm.observation_probabilities[0][dtrajs_[i]]
p_1 = hmm.observation_probabilities[1][dtrajs_[i]]
rbg[i,0] = p_0 / (p_0 + p_1)
rbg[i,2] = p_1 / (p_0 + p_1)
#rgb *= 255
#rgb = np.array(rgb, dtype=int)
#rgb[:10]
In [ ]:
In [112]:
plt.scatter(X[:,0], X[:,1],linewidth=0,c=rbg,s=1)#,cmap='Spectral')
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by HMM membership)')
Out[112]:
In [113]:
trajs = [x,y]
In [130]:
npr.seed(100)
sample_inds = hmm.sample_by_observation_probabilities(10)
In [131]:
samples = [np.array([trajs[i][j] for (i,j) in state]) for state in sample_inds]
In [132]:
plt.scatter(samples[0][:,0], samples[0][:,1], linewidths=0, c='red')
plt.scatter(samples[1][:,0], samples[1][:,1], linewidths=0, c='blue')
Out[132]:
So, in this case, we'd take samples from the red region and assume there's a free energy penalty of ~0.8 to confine there relative to , when in