notebook.community

Edit and run



In [1]:

    
import numpy as np
import numpy.random as npr
npr.seed(0)
import matplotlib.pyplot as plt
%matplotlib inline



In [51]:

    
def skewed_gaussian_potential(x, eps=0.01):
    return (x[0] - x[1])**2/(2*eps) + ((x[0] + x[1])**2/(2))

separation = 0.5
true_free_energy_difference = 1.0
#def mixture_potential(x):
#    if (x[1] < x[0]):
#        return skewed_gaussian_potential(x-(separation,-separation))
#    else:
#        return elevation * skewed_gaussian_potential(x+(separation,-separation))
    
#def mixture(x):
#    return np.exp(-mixture_potential(x))

def mixture(x):
    return (x[1] < x[0]) * np.exp(-skewed_gaussian_potential(x-(separation,-separation))) + (x[1] > x[0]) * np.exp(true_free_energy_difference-skewed_gaussian_potential(x+(separation,-separation)))



In [40]:

    
#x_ = np.random.rand(100000,2)*4 - 2
#plt.scatter(x_[:,0],x_[:,1],c=[mixture(point) for point in x_],s=2,linewidths=0,cmap='Blues')

n_grid = 300
linspace = np.linspace(-2,2,n_grid)
X = np.zeros((n_grid, n_grid))
for i in range(n_grid):
    for j in range(n_grid):
        X[i,j] = mixture(np.array([linspace[i], linspace[j]]))
        #X[i,j] = mixture_potential(np.array([linspace[i], linspace[j]]))
        

plt.contourf(linspace, linspace, X, levels=np.linspace(np.min(X),np.max(X),100), cmap='Blues')
plt.colorbar()
plt.xlim(-2,2)
plt.ylim(-2,2)
#plt.axis('off')
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.title('Equilibrium density')









    Out[40]:





<matplotlib.text.Text at 0x126b4df90>



In [42]:

    
def metropolis_hastings(x0, q, n_samples=100, step_size=0.05):
    ndim = len(x0)
    q0 = q(x0)
    samples = np.zeros((n_samples,ndim))
    for i in range(n_samples):
        x1 = x0 + npr.randn(ndim)*step_size
        q1 = q(x1)
        if npr.rand() <= q1 / q0:
            samples[i] = x1
            x0 = x1
            q0 = q1
        else:
            samples[i] = x0
    
    return samples



In [43]:

    
npr.seed(0)
x = metropolis_hastings(np.array((1,-1)),mixture,100000)
y = metropolis_hastings(np.array((-1,1)),mixture,100000)



In [44]:

    
plt.plot(x[:,0], x[:,1],linewidth=0.1, color='blue')
plt.figure()
plt.plot(y[:,0], y[:,1],linewidth=0.1, color='green')

plt.figure()
plt.plot(x[:,0], x[:,1],linewidth=0.1)
plt.plot(y[:,0], y[:,1],linewidth=0.1)
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by starting condition)')









    Out[44]:





<matplotlib.text.Text at 0x12c255110>



In [45]:

    
u_kln = np.zeros((2, 2, len(x)))
u_kln[0][0] = np.array([mixture(x_) for x_ in x])
u_kln[0][1] = np.array([mixture(x_) for x_ in y])
u_kln[1][0] = np.array([mixture(x_) for x_ in x])
u_kln[1][1] = np.array([mixture(x_) for x_ in y])

N_k = np.array((len(x), len(y)))



In [46]:

    
# let's evaluate the free energy difference using MBAR
from pymbar import MBAR
mbar = MBAR(u_kln, N_k)



In [47]:

    
mbar.f_k









    Out[47]:





array([ 0.        ,  0.90128278])



In [48]:

    
X = np.vstack((x,y))
X.shape









    Out[48]:





(200000, 2)



In [49]:

    
import pyemma



In [141]:

    
#kmeans = pyemma.coordinates.cluster_mini_batch_kmeans([x,y],k=10,max_iter=10)
regtime = pyemma.coordinates.cluster_uniform_time([x,y], k=10)









    



08-09-16 09:07:13 pyemma.coordinates.clustering.kmeans.MiniBatchKmeansClustering[8] INFO     Algorithm did not reach convergence criterion of 1e-05 in 10 iterations. Consider increasing max_iter.



In [134]:

    
#dtrajs = [dtraj.flatten() for dtraj in kmeans.get_output()]



In [142]:

    
dtrajs = [dtraj.flatten() for dtraj in regtime.get_output()]



In [143]:

    
dtrajs[0]









    Out[143]:





array([4, 4, 4, ..., 0, 0, 0], dtype=int32)



In [144]:

    
c = np.hstack(dtrajs)



In [145]:

    
len(X), len(c)









    Out[145]:





(200000, 200000)



In [149]:

    
plt.scatter(X[:,0], X[:,1],linewidth=0,c=np.hstack(dtrajs),s=1,cmap='Spectral')
plt.scatter(kmeans.clustercenters[:,0], kmeans.clustercenters[:,1])
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by regular-time clustering assignment)')









    Out[149]:





<matplotlib.text.Text at 0x13b605b50>



In [150]:

    
msm = pyemma.msm.estimate_markov_model(dtrajs,1)



In [151]:

    
plt.imshow(msm.P,interpolation='none',cmap='Blues')
plt.title('MSM transition matrix\n(Trace: {0:.3f})'.format(np.trace(msm.P)))
plt.xlabel('From')
plt.ylabel('To')
plt.colorbar()

plt.figure()

plt.imshow(np.log(msm.P),interpolation='none',cmap='Blues')
plt.title('log(MSM transition matrix)\n(Trace: {0:.3f})'.format(np.trace(msm.P)))
plt.xlabel('From')
plt.ylabel('To')
plt.colorbar()









    



/Users/joshuafass/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:9: RuntimeWarning: divide by zero encountered in log






    Out[151]:





<matplotlib.colorbar.Colorbar at 0x13b2b6c90>



In [152]:

    
np.trace(msm.P)









    Out[152]:





8.8492080919352425



In [153]:

    
msm.active_count_fraction, msm.active_state_fraction









    Out[153]:





(1.0, 1.0)



In [154]:

    
lags = range(1,100)
its = pyemma.msm.its(dtrajs,lags,nits=5, errors='bayes')



In [155]:

    
pyemma.plots.plot_implied_timescales(its)
plt.title('Top 5 implied timescales')









    Out[155]:





<matplotlib.text.Text at 0x1321f4ed0>



In [159]:

    
cktest = msm.cktest(2,mlags=100)



In [164]:

    
pyemma.plots.plot_cktest(cktest)









    Out[164]:





(<matplotlib.figure.Figure at 0x13d8b6fd0>,
 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x126a93290>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x13d9fe350>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x13da70ad0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x13dade1d0>]], dtype=object))



In [158]:

    
# what if we did tICA?

tica = pyemma.coordinates.tica([x,y])



In [67]:

    
tica.timescales









    Out[67]:





array([ 1292.68667956,   447.37288151])



In [68]:

    
tica.eigenvectors[0]









    Out[68]:





array([-0.9979056 ,  1.07551866])



In [69]:

    
X_tica = np.vstack(tica.get_output())
X_tica.shape









    Out[69]:





(200000, 2)



In [70]:

    
plt.scatter(X_tica[:,0], X_tica[:,1], c=c, linewidths=0)









    Out[70]:





<matplotlib.collections.PathCollection at 0x12a01a790>



In [71]:

    
kmeans = pyemma.coordinates.cluster_mini_batch_kmeans(tica.get_output(),k=20,max_iter=1000)









    



07-09-16 12:53:56 pyemma.coordinates.clustering.kmeans.MiniBatchKmeansClustering[6] INFO     Cluster centers converged after 694 steps.



In [72]:

    
dtrajs = [dtraj.flatten() for dtraj in kmeans.get_output()]
msm = pyemma.msm.estimate_markov_model(dtrajs,1)



In [73]:

    
msm.active_count_fraction, msm.active_state_fraction









    Out[73]:





(0.5, 0.5)



In [74]:

    
np.trace(msm.P)









    Out[74]:





9.1960415563239124



In [75]:

    
plt.scatter(X[:,0], X[:,1],linewidth=0,c=np.hstack(dtrajs),s=1,cmap='Spectral')
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by cluster w.r.t. kinetic-map (tICA) metric)')









    Out[75]:





<matplotlib.text.Text at 0x1304d0bd0>



In [76]:

    
# what's the estimated relative free energy of the two macrostates / would we be able to identify two macrostates?

plt.plot(msm.timescales(),'.')









    Out[76]:





[<matplotlib.lines.Line2D at 0x1306896d0>]



In [106]:

    
%%time
hmm = pyemma.msm.estimate_hidden_markov_model(dtrajs, 2, 1, maxit=1000)









    



CPU times: user 1.07 s, sys: 11.1 ms, total: 1.08 s
Wall time: 1.06 s



In [107]:

    
hmm.pi









    Out[107]:





array([ 0.30915457,  0.69084543])



In [108]:

    
f_i = -np.log(sorted(hmm.stationary_distribution))[::-1]
f_i -= f_i.min()
plt.figure()
plt.plot(f_i, '.')
plt.xlabel('Macrostate')
plt.ylabel(r'$\Delta G$ $(k_B T)$')
plt.title('Macrostate free energies')









    Out[108]:





<matplotlib.text.Text at 0x138159090>



In [109]:

    
f_i









    Out[109]:





array([ 0.        ,  0.80407472])



In [110]:

    
hmm.observation_probabilities









    Out[110]:





array([[  1.56150695e-001,   1.35338811e-193,   9.61696182e-195,
          5.45121180e-194,   6.67286325e-194,   2.44843643e-001,
          7.62882267e-002,   4.41404462e-202,   8.81840095e-194,
          1.95828712e-194,   1.23669306e-193,   1.20921703e-193,
          1.81373801e-193,   8.57171087e-003,   2.87009201e-194,
          2.11058164e-001,   2.85271072e-002,   2.74103710e-193,
          2.74560453e-001,   9.28758667e-204],
       [  4.53150722e-236,   1.11320434e-001,   8.41941753e-002,
          4.42936777e-002,   5.44696435e-002,   6.83304555e-191,
          1.03337903e-236,   5.96372392e-002,   7.17239127e-002,
          1.59080888e-002,   1.01918884e-001,   9.86040956e-002,
          8.36296480e-002,   1.99369385e-238,   2.33410311e-002,
          4.50945787e-235,   1.06226169e-001,   1.18543488e-001,
          5.10760239e-004,   2.56787530e-002]])



In [111]:

    
# color by hidden state membership: red channel = state 0, blue channel = state 1
dtrajs_ = np.hstack(dtrajs)
rbg = np.zeros((len(dtrajs_), 3))

for i in range(len(dtrajs_)):
    p_0 = hmm.observation_probabilities[0][dtrajs_[i]]
    p_1 = hmm.observation_probabilities[1][dtrajs_[i]]
    rbg[i,0] = p_0 / (p_0 + p_1)
    rbg[i,2] = p_1 / (p_0 + p_1)
    
#rgb *= 255
#rgb = np.array(rgb, dtype=int)
#rgb[:10]



In [ ]:



In [112]:

    
plt.scatter(X[:,0], X[:,1],linewidth=0,c=rbg,s=1)#,cmap='Spectral')
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.title('Simulated trajectories\n(Colored by HMM membership)')









    Out[112]:





<matplotlib.text.Text at 0x13879a490>



In [113]:

    
trajs = [x,y]



In [130]:

    
npr.seed(100)
sample_inds = hmm.sample_by_observation_probabilities(10)



In [131]:

    
samples = [np.array([trajs[i][j] for (i,j) in state]) for state in sample_inds]



In [132]:

    
plt.scatter(samples[0][:,0], samples[0][:,1], linewidths=0, c='red')
plt.scatter(samples[1][:,0], samples[1][:,1], linewidths=0, c='blue')









    Out[132]:





<matplotlib.collections.PathCollection at 0x131fccbd0>

So, in this case, we'd take samples from the red region and assume there's a free energy penalty of ~0.8 to confine there relative to , when in