We want a 2D model system we can use for illustration, and also to weed out ideas that are definitely bad...
In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
def generate_egg_crate_landscape(well_frequency=6.0,
well_depth=5.0,
background_roughness_frequency=20.0,
background_depth=1.0,
):
"""Rough energy landscape, with ~ 16 big wells and a lot of little wells"""
def U(x):
background = background_depth * np.sum(np.sin(x * background_roughness_frequency))
wells = well_depth * np.sum(np.sin(x * well_frequency))
return background + wells + np.sum((x)**4)
def dU(x):
background = background_depth * background_roughness_frequency * (np.cos(x * background_roughness_frequency))
wells = well_depth * well_frequency * (np.cos(x * well_frequency))
return background + wells + (4 * (x**3))
return U, dU
In [3]:
np.random.seed(2)
U, dU = generate_egg_crate_landscape()
In [4]:
from numba import jit
In [5]:
@jit
def force(x):
return -dU(x)
In [6]:
def langevin_factory(force):
@jit
def simulate_langevin(x, v, stepsize=0.01, collision_rate=0.01, n_steps=1000, mass=1.0, temperature=1.0):
dim = len(x)
xs = np.zeros((n_steps, dim))
a = np.exp(-collision_rate * stepsize)
b = np.sqrt(1 - np.exp(-2 * collision_rate * stepsize))
sigma_O = np.sqrt(temperature / mass)
f = force(x)
maxwell_boltzmann = sigma_O * np.random.randn(n_steps, dim)
for i in range(n_steps):
# V
v += (stepsize / 2) * f / mass
# R
x += (stepsize / 2) * v
# O
v = a * v + b * maxwell_boltzmann[i]
# R
x += (stepsize / 2) * v
f = force(x)
# V
v += (stepsize / 2) * f / mass
xs[i] = x
return xs
return simulate_langevin
In [7]:
simulate_langevin = langevin_factory(force)
In [8]:
from itertools import product
maxima_1d = np.pi / 12 * np.array([5, 1, -3, -7]) + (np.pi / 6)
initial_conditions = np.array(list(product(maxima_1d, repeat=2)))
initial_conditions
Out[8]:
In [9]:
np.random.seed(0)
In [10]:
# run a few steps just to get it to JIT compile
_ = simulate_langevin(np.random.randn(2), np.random.randn(2), n_steps=10)
In [17]:
np.random.seed(10)
initial_x1 = np.random.randn(2)
initial_x2 = np.random.randn(2)
initial_v1 = np.random.randn(2)
initial_v2 = np.random.randn(2)
In [ ]:
In [25]:
np.random.seed(0)
xs1 = simulate_langevin(initial_x1, initial_v1, n_steps=10000)
In [26]:
np.random.seed(0)
xs2 = simulate_langevin(initial_x2, initial_v2, n_steps=10000)
In [28]:
plt.plot(np.linalg.norm(xs2 - xs1, axis=1))
Out[28]:
In [11]:
%%time
simulate_langevin(np.random.randn(2), np.random.randn(2), n_steps=10000)
Out[11]:
In [12]:
from tqdm import tqdm
n_clones = 50
overdamped_trajs = []
for i in tqdm(range(len(initial_conditions))):
x0 = np.array(initial_conditions[i])
for _ in range(n_clones):
xs = simulate_langevin(x0, np.random.randn(2), stepsize=0.05, collision_rate=np.inf, n_steps=10000)
overdamped_trajs.append(xs)
In [13]:
underdamped_trajs = []
for i in tqdm(range(len(initial_conditions))):
x0 = np.array(initial_conditions[i])
for _ in range(n_clones):
xs = simulate_langevin(x0, np.random.randn(2), stepsize=0.05, collision_rate=1.0, n_steps=10000)
underdamped_trajs.append(xs)
In [14]:
xs = overdamped_trajs[0]
plt.plot(xs[:,0], xs[:,1], linewidth=1, label='overdamped')
Out[14]:
In [15]:
xs = underdamped_trajs[0]
plt.plot(xs[:,0], xs[:,1], linewidth=1, label='underdamped')
xs = overdamped_trajs[0]
plt.plot(xs[:,0], xs[:,1], linewidth=1, label='overdamped')
plt.legend(loc='best')
Out[15]:
In [16]:
np.random.seed(1)
plt.figure(figsize=(10,10))
n_rows = n_cols = 3
for i in range(n_rows * n_cols):
ind = 50 * i
plt.subplot(n_rows, n_cols, i + 1)
xs = underdamped_trajs[ind]
plt.plot(xs[:,0], xs[:,1], linewidth=0.5, alpha=0.5, label='underdamped')
xs = overdamped_trajs[ind]
plt.plot(xs[:,0], xs[:,1], linewidth=0.5, alpha=0.5, label='overdamped')
plt.xlim(-2,2)
plt.ylim(-2,2)
plt.legend(loc='best')
plt.title('trajectory {}'.format(ind))
plt.xticks([-1,0,1])
plt.yticks([-1,0,1])
plt.tight_layout()
In [17]:
for xs in underdamped_trajs:
plt.plot(xs[:,0], xs[:,1])
In [18]:
for xs in overdamped_trajs:
plt.plot(xs[:,0], xs[:,1])
In [19]:
for xs in underdamped_trajs:
plt.plot(xs[:,0], xs[:,1], color='blue')
for xs in overdamped_trajs:
plt.plot(xs[:,0], xs[:,1], color='orange')
In [20]:
for xs in overdamped_trajs:
plt.plot(xs[:,0], xs[:,1])
width = 5
X = np.random.rand(10000, 2) * width - (0.5 * width)
Us = np.array([U(x) for x in X])
plt.scatter(X[:,0], X[:,1], c=Us, s=2, cmap='Blues')
plt.colorbar()
Out[20]:
In [21]:
for xs in underdamped_trajs:
plt.plot(xs[:,0], xs[:,1])
X = np.random.rand(10000, 2) * width - (0.5 * width)
Us = np.array([U(x) for x in X])
plt.scatter(X[:,0], X[:,1], c=Us, s=2, cmap='Blues')
plt.colorbar()
Out[21]:
In [22]:
from msmbuilder.cluster import RegularSpatial
clust = RegularSpatial(d_min=0.15)
clust.fit([xs[::10] for xs in overdamped_trajs])
Out[22]:
In [23]:
len(clust.cluster_centers_)
Out[23]:
In [24]:
X = np.vstack(overdamped_trajs)
plt.hexbin(X[:,0], X[:,1], bins='log', cmap='Blues')
plt.scatter(*clust.cluster_centers_.T, color='red')
Out[24]:
In [25]:
X = np.vstack(underdamped_trajs)
plt.hexbin(X[:,0], X[:,1], bins='log', cmap='Blues')
plt.scatter(*clust.cluster_centers_.T, color='red')
Out[25]:
In [26]:
underdamped_dtrajs = clust.predict(underdamped_trajs)
In [27]:
overdamped_dtrajs = clust.predict(overdamped_trajs)
In [28]:
import pyemma
overdamped_msm = pyemma.msm.estimate_markov_model(overdamped_dtrajs, lag=10)
overdamped_msm.active_count_fraction
Out[28]:
In [29]:
underdamped_msm = pyemma.msm.estimate_markov_model(underdamped_dtrajs, lag=10)
underdamped_msm.active_count_fraction
Out[29]:
In [30]:
plt.plot(underdamped_msm.timescales(), '.', label='underdamped')
plt.plot(overdamped_msm.timescales(), '.', label='overdamped')
plt.legend(loc='best')
Out[30]:
In [31]:
plt.plot(underdamped_msm.timescales(), '.', label='underdamped')
plt.plot(overdamped_msm.timescales(), '.', label='overdamped')
plt.legend(loc='best')
plt.yscale('log')
In [32]:
max(overdamped_msm.timescales()) / max(underdamped_msm.timescales())
Out[32]:
In [33]:
int(max(overdamped_msm.timescales())), int(max(underdamped_msm.timescales()))
Out[33]:
In [34]:
plt.imshow(overdamped_msm.P, cmap='Blues')
Out[34]:
In [35]:
plt.imshow(underdamped_msm.P, cmap='Blues')
Out[35]:
In [36]:
len(overdamped_dtrajs)
Out[36]:
In [37]:
from scipy.spatial.distance import cdist
def ground_truth_discretization(xs):
return np.argmin(cdist(xs, initial_conditions), 1)
In [38]:
overdamped_dtrajs_gt = [ground_truth_discretization(xs) for xs in overdamped_trajs]
underdamped_dtrajs_gt = [ground_truth_discretization(xs) for xs in underdamped_trajs]
In [ ]:
In [39]:
sets = [set(x) for x in overdamped_dtrajs_gt]
n_states = list(map(len, sets))
total_n_states = len(set.union(*sets))
plt.hist(n_states, bins=np.arange(1 + total_n_states))
plt.xlabel('# states visited in a single trajectory')
plt.ylabel('# trajectories')
plt.title('overdamped')
Out[39]:
In [40]:
# let's bootstrap over the trajectories and see how quickly we visit all states
cumulative_states_visited = []
n_bootstrapped_samples = 100
for _ in tqdm(range(n_bootstrapped_samples)):
inds = np.random.randint(0, len(sets), len(sets))
re_ordered_sets = [sets[i] for i in inds]
cumulative_states_visited.append(np.array([len(set.union(*re_ordered_sets[:i])) for i in range(1, len(sets))]))
In [41]:
for curve in cumulative_states_visited:
plt.plot(curve, color='grey', linewidth=0.5)
In [42]:
mean = np.mean(cumulative_states_visited, 0)
stdev = np.std(cumulative_states_visited, 0)
plt.plot(mean)
plt.fill_between(np.arange(len(mean)), mean - 2 * stdev, np.minimum(total_n_states, mean + 2 * stdev), alpha=0.5)
plt.hlines(total_n_states, 0, len(cumulative_states_visited[0]))
Out[42]:
In [43]:
plt.plot((np.array(cumulative_states_visited) >= 0.50 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.75 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.90 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.95 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.99 * total_n_states).mean(0))
plt.title('overdamped')
Out[43]:
In [44]:
sets = [set(x) for x in underdamped_dtrajs_gt]
n_states = list(map(len, sets))
total_n_states = len(set.union(*sets))
plt.hist(n_states, bins=np.arange(1 + total_n_states))
plt.xlabel('# states visited in a single trajectory')
plt.ylabel('# trajectories')
plt.title('underdamped')
Out[44]:
In [ ]:
In [45]:
# let's bootstrap over the trajectories and see how quickly we visit all states
cumulative_states_visited = []
n_bootstrapped_samples = 100
for _ in tqdm(range(n_bootstrapped_samples)):
inds = np.random.randint(0, len(sets), len(sets))
re_ordered_sets = [sets[i] for i in inds]
cumulative_states_visited.append(np.array([len(set.union(*re_ordered_sets[:i])) for i in range(1, len(sets))]))
In [46]:
for curve in cumulative_states_visited:
plt.plot(curve, color='grey', linewidth=0.5)
In [47]:
mean = np.mean(cumulative_states_visited, 0)
stdev = np.std(cumulative_states_visited, 0)
plt.plot(mean)
plt.fill_between(np.arange(len(mean)), mean - 2 * stdev, np.minimum(total_n_states, mean + 2 * stdev), alpha=0.5)
plt.hlines(total_n_states, 0, len(cumulative_states_visited[0]))
Out[47]:
In [48]:
plt.plot((np.array(cumulative_states_visited) >= 0.50 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.75 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.90 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.95 * total_n_states).mean(0))
plt.plot((np.array(cumulative_states_visited) >= 0.99 * total_n_states).mean(0))
plt.title('underdamped')
Out[48]:
In [49]:
overdamped_msm_gt = pyemma.msm.estimate_markov_model(overdamped_dtrajs_gt, lag=10)
overdamped_msm_gt.active_count_fraction
Out[49]:
In [50]:
underdamped_msm_gt = pyemma.msm.estimate_markov_model(underdamped_dtrajs_gt, lag=10)
underdamped_msm_gt.active_count_fraction
Out[50]:
In [51]:
plt.plot(underdamped_msm_gt.timescales(), '.', label='underdamped')
plt.plot(overdamped_msm_gt.timescales(), '.', label='overdamped')
plt.legend(loc='best')
plt.yscale('log')
In [52]:
max(overdamped_msm_gt.timescales()) / max(underdamped_msm_gt.timescales())
Out[52]:
In [53]:
int(max(overdamped_msm_gt.timescales())), int(max(underdamped_msm_gt.timescales()))
Out[53]:
In [54]:
plt.bar(np.arange(len(underdamped_msm_gt.pi)), underdamped_msm_gt.pi)
Out[54]:
In [55]:
plt.scatter(initial_conditions[:,0], initial_conditions[:,1])#, c=underdamped_msm_gt.pi)
Out[55]:
In [ ]:
# let's look at implied timescales as function of lagtime for overdamped and underdamped...
In [65]:
len(underdamped_dtrajs[0])
Out[65]:
In [74]:
lags = np.unique(np.array(np.round(np.logspace(0,4)), dtype=int))[:-1]
lags
Out[74]:
In [71]:
underdamped_its = pyemma.msm.its(underdamped_dtrajs, lags=lags, nits=10, show_progress=False)
In [ ]:
overdamped_its = pyemma.msm.its(overdamped_dtrajs, lags=lags, nits=10)
In [ ]:
In [ ]:
underdamped_its = pyemma.msm.its(underdamped_dtrajs, lags=np.arange(1,100), nits=10)
In [72]:
len(underdamped_dtrajs)
Out[72]:
In [78]:
from msmbuilder.msm import implied_timescales
underdamped_its = implied_timescales(underdamped_dtrajs, lag_times=lags)
overdamped_its = implied_timescales(overdamped_dtrajs, lag_times=lags)
In [80]:
overdamped_its.shape
Out[80]:
In [93]:
for i in range(overdamped_its.shape[1]):
plt.plot(lags, overdamped_its[:,i], color='orange')
for i in range(underdamped_its.shape[1]):
plt.plot(lags, underdamped_its[:,i], color='blue')
In [94]:
for i in range(overdamped_its.shape[1]):
plt.plot(lags, overdamped_its[:,i], color='orange')
plt.figure()
for i in range(underdamped_its.shape[1]):
plt.plot(lags, underdamped_its[:,i], color='blue')
In [29]:
plt.plot(lags, overdamped_its[:,0] / underdamped_its[:,0])
In [91]:
plt.plot(lags, underdamped_its[:,0])
plt.plot(lags, overdamped_its[:,0])
plt.yscale('log')
In [92]:
plt.plot(lags, underdamped_its[:,0])
plt.plot(lags, overdamped_its[:,0])
plt.yscale('log')
plt.xscale('log')
In [99]:
plt.plot(np.sum(overdamped_its, 1))
plt.plot(np.sum(underdamped_its, 1))
plt.yscale('log')
In [100]:
lags
Out[100]:
In [ ]: