In [2]:
from __future__ import print_function
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pyemma
import pandas as pd
# import some functions which should not clutter the notebook
# import shortcuts
import glob
# figure size parameters
pw = 6
ph = 0.75 * pw
In [146]:
In [165]:
a.query("xbias != 'step'").convert_objects(convert_numeric=True).mean()
Out[165]:
In [190]:
debug_folder = "/Users/weilu/Research/server/nov_2017/06nov/next_gen_native_based_memb_3_rg_0.4_lipid_0.6_extended/simulation/dis_60.0/"
dis_list = []
for i in range(12):
a = pd.read_table(debug_folder+f"x.{i}.colvars.traj", sep="\s+", skiprows=1, names=["num", "xbias", "xo"])
a = pd.to_numeric(a.query("xbias != 'step'")["xbias"])[1:]
b = pd.read_csv(debug_folder+f"1/addforce.{i}.dat")[" Distance"].values
# print(pd.Series(a-b).mean())
dis_list.append(pd.Series(a-b).mean())
print(np.mean(dis_list))
In [192]:
folder = "/Users/weilu/Research/server/nov_2017/06nov/next_gen_native_based_memb_3_rg_0.4_lipid_0.6_extended/simulation/"
all_folder = glob.glob(folder+"dis_*")
In [198]:
import re
numbers = re.compile(r'dis_(\d+)')
for sim in all_folder:
num = numbers.findall(sim)[0]
debug_folder = sim + "/"
dis_list = []
for i in range(12):
a = pd.read_table(debug_folder+f"x.{i}.colvars.traj", sep="\s+", skiprows=1, names=["num", "xbias", "xo"])
a = pd.to_numeric(a.query("xbias != 'step'")["xbias"])[1:]
b = pd.read_csv(debug_folder+f"1/addforce.{i}.dat")[" Distance"].values
# print(pd.Series(a-b).mean())
dis_list.append(pd.Series(a-b).mean())
print(num, np.mean(dis_list))
In [185]:
pd.Series(a-b).describe()
Out[185]:
In [184]:
b.shape
Out[184]:
In [175]:
pd.to_numeric(a.query("xbias != 'step'")["xbias"]).describe()
Out[175]:
In [176]:
pd.read_csv(debug_folder+"1/addforce.0.dat").describe()
Out[176]:
In [28]:
from pyemma.thermo import estimate_multi_temperature as estimate_mt
>>> import numpy as np
>>> energy_trajs = [np.array([1.6, 1.4, 1.0, 1.0, 1.2, 1.0, 1.0]), np.array([0.8, 0.7, 0.5, 0.6, 0.7, 0.8, 0.7])]
>>> temp_trajs = [np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), np.array([2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0])]
>>> dtrajs = [np.array([0, 1, 2, 2, 2, 2, 2]), np.array([0, 1, 2, 2, 1, 0, 1])]
>>> tram = estimate_mt(energy_trajs, temp_trajs, dtrajs, energy_unit='kT', temp_unit='kT', estimator='tram', lag=1)
>>> tram.f
Out[28]:
In [24]:
folder = "/Users/weilu/Research/davinci/nov_2017/13nov/all_data_folder/new_next_gen_native_based_memb_3_rg_0.4_lipid_0.6_extended/"
In [3]:
folder = "/Users/weilu/Research/davinci/nov_2017/20nov/all_data_folder/no_side_contraint_memb_3_rg_0.4_lipid_0.6_extended/"
In [4]:
folder_list = glob.glob(folder+"*.feather")
In [ ]:
In [55]:
folder_list = sorted(folder_list)
In [58]:
Out[58]:
In [59]:
import re
numbers = re.compile(r'dis(\d+).0.feather')
data_list = []
adw_us_trajs = []
adw_us_umbrella_centers = []
temp_trajs = []
energy_trajs = []
for sim in folder_list:
num = numbers.findall(sim)[0]
tmp = pd.read_feather(sim).query("Temp == 'T3'").assign(Bias=num).query("Step > 2e7")
if float(num) > 150:
continue
dic = {"T0":350, "T1":400, "T2":450, "T3":500, "T4":550, "T5":600, "T6":650, "T7":700, "T8":750, "T9":800, "T10":900, "T11":1000}
temps = list(dic.values())
def convert(x):
return dic[x]
tmp["Temp"] = tmp["Temp"].apply(convert)
temp_trajs.append(tmp["Temp"].values.reshape(-1,1))
energy_trajs.append(tmp["TotalE"].values.reshape(-1,1))
data_list.append(tmp)
print(num, np.mean(tmp["Distance"].values), float(num) - np.mean(tmp["Distance"].values))
adw_us_trajs.append(tmp["Distance"].values.reshape(-1,1))
adw_us_umbrella_centers.append(float(num))
data = pd.concat(data_list)
In [63]:
a = list(np.linspace(0.2,0.9,36))
In [ ]:
{:10.4f}
In [64]:
a[0]
Out[64]:
In [71]:
print(f"{a[0]:.1f}")
In [67]:
print("{}".format(a[0]))
In [61]:
np.linspace(0.8,1,2)
Out[61]:
In [44]:
from pyemma.thermo import estimate_multi_temperature as estimate_mt
>>> import numpy as np
>>> energy_trajs = [np.array([1.6, 1.4, 1.0, 1.0, 1.2, 1.0, 1.0]), np.array([0.8, 0.7, 0.5, 0.6, 0.7, 0.8, 0.7])]
>>> temp_trajs = [np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), np.array([2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0])]
>>> dtrajs = [np.array([0, 1, 2, 2, 2, 2, 2]), np.array([0, 1, 2, 2, 1, 0, 1])]
>>> tram = estimate_mt(energy_trajs, temp_trajs, dtrajs, energy_unit='kT', temp_unit='kT', estimator='tram', lag=1)
>>> tram.f
Out[44]:
In [45]:
np.mean(tmp["Distance"].values)
Out[45]:
In [46]:
adw_us_cluster = pyemma.coordinates.cluster_regspace(adw_us_trajs, max_centers=500, dmin=1)
In [47]:
n = len(adw_us_umbrella_centers)
adw_us_force_constants = [0.02]*n
In [48]:
adw_us_estimator = pyemma.thermo.estimate_umbrella_sampling(
adw_us_trajs, adw_us_cluster.dtrajs, adw_us_umbrella_centers, adw_us_force_constants,
maxiter=100000, maxerr=1.0E-13, save_convergence_info=50, estimator='wham')
In [49]:
pyemma.plots.plot_convergence_info(adw_us_estimator)
Out[49]:
In [50]:
plt.plot(adw_us_estimator.umbrella_centers[:, 0], adw_us_estimator.f_therm, 's', markersize=10, label=adw_us_estimator.name)
Out[50]:
In [18]:
tmp["Distance"].values.reshape(-1,1).shape
Out[18]:
In [42]:
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
data.hist("Distance", ax=ax, bins=500)
plt.show()
In [45]:
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
data.hist("Distance", ax=ax, bins=100)
plt.show()
In [43]:
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
data.groupby("Bias").hist("Distance", ax=ax, bins=50)
plt.show()
In [ ]:
In [37]:
import re
numbers = re.compile(r'dis(\d+).0.feather')
test = "dis102.0.feather"
numbers.findall(test)[0]
Out[37]:
The bias is computed via a harmonic potential based on the deviation of a frame from a reference structure. In the usual one-dimensional case, this reads
$$b^{(i)}(\mathbf{x}) = \frac{k^{(i)}}{2} \left\Vert \mathbf{x} - \mathbf{x}^{(i)} \right\Vert^2.$$In the more general case, though, one can use a non-symmetric force matrix:
$$b^{(i)}(\mathbf{x}) = \frac{1}{2} \left\langle \mathbf{x} - \mathbf{x}^{(i)} \middle\vert \mathbf{k}^{(i)} \middle\vert \mathbf{x} - \mathbf{x}^{(i)} \right\rangle.$$For these simulation types, the pyemma.thermo module provides the API functions
def estimate_umbrella_sampling(
us_trajs, us_dtrajs, us_centers, us_force_constants,
md_trajs=None, md_dtrajs=None, kT=None,
maxiter=10000, maxerr=1.0E-15, save_convergence_info=0,
estimator='wham', lag=1, dt_traj='1 step', init=None):
...
In [2]:
adw_x, adw_f, adw_pi = shortcuts.adw_reference(-1, 5, 100)
fig, ax = plt.subplots(1, 2, figsize=(2 * pw, ph))
ax[0].plot(adw_x, adw_pi, linewidth=3, color='black')
ax[0].set_ylabel(r"$\pi(x)$", fontsize=20)
ax[0].semilogy()
ax[1].plot(adw_x, adw_f, linewidth=3, color='black')
ax[1].set_ylabel(r"$f(x)$ / kT", fontsize=20)
for _ax in ax:
_ax.set_xlabel(r"$x$ / a.u.", fontsize=20)
_ax.tick_params(labelsize=15)
fig.tight_layout()
In [3]:
fig, ax = plt.subplots(1, 2, figsize=(2 * pw, ph))
# plot the thermodynamic ground/unbiased state (kT=1.0)
ax[0].plot(adw_x, adw_pi, linewidth=3, color='black', label='unbiased')
ax[1].plot(adw_x, adw_f, linewidth=3, color='black', label='unbiased')
# plot the sixth umbrella
_, adw_f2, adw_pi2 = shortcuts.adw_reference(adw_x[0], adw_x[-1], adw_x.shape[0], k_bias=30.0, x_bias=1.07894737)
ax[0].plot(adw_x, adw_pi2, linewidth=3, color='blue', label='umbrella 6')
ax[1].plot(adw_x, adw_f2, linewidth=3, color='blue', label='umbrella 6')
# plot the 10th umbrella
_, adw_f2, adw_pi2 = shortcuts.adw_reference(adw_x[0], adw_x[-1], adw_x.shape[0], k_bias=30.0, x_bias=2.13157895)
ax[0].plot(adw_x, adw_pi2, linewidth=3, color='green', label='umbrella 10')
ax[1].plot(adw_x, adw_f2, linewidth=3, color='green', label='umbrella 10')
# plot the 14th umbrella
_, adw_f2, adw_pi2 = shortcuts.adw_reference(adw_x[0], adw_x[-1], adw_x.shape[0], k_bias=30.0, x_bias=3.18421053)
ax[0].plot(adw_x, adw_pi2, linewidth=3, color='red', label='umbrella 14')
ax[1].plot(adw_x, adw_f2, linewidth=3, color='red', label='umbrella 14')
# finish the figure
ax[0].set_ylabel(r"$\pi^{(j)}(x)$", fontsize=20)
ax[0].semilogy()
ax[0].set_ylim([1.0E-10, 1.0])
ax[0].legend(loc=3, fontsize=12, fancybox=True, framealpha=0.5)
ax[1].set_ylabel(r"$f^{(j)}(x) - f^{(j)}$ / kT", fontsize=20)
ax[1].set_ylim([0.0, 30.0])
ax[1].legend(loc=2, fontsize=12, fancybox=True, framealpha=0.5)
for _ax in ax:
_ax.set_xlabel(r"$x$ / a.u.", fontsize=20)
_ax.tick_params(labelsize=15)
fig.tight_layout()
First step: import the data from 100 precomputed umbrella sampling trajectories as listed in the file meta.dat...
In [4]:
_adw_us_data_path = "data/asymmetric_double_well/umbrella_sampling/"
_adw_us_meta = np.loadtxt(_adw_us_data_path + "meta.dat")
adw_us_trajs = [np.load(_adw_us_data_path + "us_traj_%03d.npy" % int(i)) for i in _adw_us_meta[:, 0]]
adw_us_umbrella_centers = list(_adw_us_meta[:, 1])
adw_us_force_constants = list(_adw_us_meta[:, 2])
In [6]:
type(adw_us_trajs)
Out[6]:
In [7]:
len(adw_us_trajs)
Out[7]:
In [8]:
adw_us_trajs[0].size
Out[8]:
In [9]:
print(len(adw_us_trajs))
print(len(adw_us_umbrella_centers))
print(len(adw_us_force_constants))
In [5]:
plt.errorbar(np.arange(len(adw_us_umbrella_centers)), adw_us_umbrella_centers, yerr=1.0/np.array(adw_us_force_constants)**0.5, fmt='_')
plt.xlabel('trajectory number')
plt.ylabel('x')
Out[5]:
In [37]:
import glob
In [11]:
print('order parameter trajectory shape', adw_us_trajs[0].shape)
In [12]:
location = "/Users/weilu/Research/server/jul_2017/pulling/free_energy/k_0.1/simulation/dis_169.69696969696972/0/"
In [13]:
file = location + "data"
In [30]:
type(adw_us_trajs[0])
Out[30]:
In [15]:
my_umbrella_centers = [i for i in np.linspace(0, 400, 100)]
my_force_constants = [0.1]*100
In [121]:
f = "/Users/weilu/Research/server/jul_2017/pulling/free_energy/k_0.1/simulation/dis_"
In [125]:
umbrella_list = glob.glob("/Users/weilu/Research/server/jul_2017/pulling/free_energy/k_0.1/simulation/dis_*")
In [16]:
import re
numbers = re.compile(r'(\d+)')
def numericalSort(value):
parts = numbers.split(value)
parts[1::2] = map(int, parts[1::2])
return parts
In [17]:
umbrella_list = sorted(umbrella_list, key=numericalSort)
In [18]:
myTrajs = []
myEnergys = []
for location in umbrella_list:
file = location + "/0/data"
tmp = np.loadtxt(file)[:,0].reshape(600,1).copy()
myEnergys.append(tmp)
tmp = np.loadtxt(file)[:,1].reshape(600,1).copy()
myTrajs.append(tmp)
In [11]:
for t in myTrajs:
plt.hist(t)
plt.ylabel('counts')
plt.xlabel('x')
Out[11]:
In [159]:
myTrajs[0].flags
Out[159]:
In [157]:
myTrajs[0].copy().flags
Out[157]:
In [78]:
len(adw_us_umbrella_centers)
Out[78]:
In [19]:
myCluster = pyemma.coordinates.cluster_regspace(myTrajs, max_centers=500, dmin=10)
In [20]:
myEstimator = pyemma.thermo.estimate_umbrella_sampling(
myTrajs, myCluster.dtrajs, my_umbrella_centers, my_force_constants,
maxiter=1000000, maxerr=1.0E-15, save_convergence_info=50, estimator='wham')
In [166]:
pyemma.plots.plot_convergence_info(myEstimator)
Out[166]:
In [167]:
plt.plot(myEstimator.umbrella_centers[:, 0], myEstimator.f_therm, 's', markersize=10, label=adw_us_estimator.name)
plt.set_xlabel(r"umbrella_center", fontsize=20)
plt.set_ylabel(r"f_therm / kT", fontsize=20)
Plot 1-D histograms of the trajectories.
In [15]:
for t in adw_us_trajs:
plt.hist(t)
plt.ylabel('counts')
plt.xlabel('x')
Out[15]:
Second step: run a clustering algorithm on the configuration trajectories to define the bins (and to compute the bin counts later on).
In [107]:
adw_us_cluster = pyemma.coordinates.cluster_regspace(adw_us_trajs, max_centers=500, dmin=0.2)
In [108]:
?pyemma.coordinates.cluster_regspace
Third step: run WHAM estimations using the estimate_umbrella_sampling API function and plot the convergence info...
In [109]:
adw_us_estimator = pyemma.thermo.estimate_umbrella_sampling(
adw_us_trajs, adw_us_cluster.dtrajs, adw_us_umbrella_centers, adw_us_force_constants,
maxiter=100000, maxerr=1.0E-15, save_convergence_info=50, estimator='wham')
In [110]:
pyemma.plots.plot_convergence_info(adw_us_estimator)
Out[110]:
Fourth step: plot the free energies f and f_therm...
In [20]:
adw_us_x, adw_us_f = shortcuts.adw_match_reference_to_binning(adw_us_trajs, adw_us_cluster.clustercenters)
fig, ax = plt.subplots(1, 2, figsize=(2 * pw, ph))
ax[0].plot(
adw_us_cluster.clustercenters[adw_us_estimator.active_set, 0], adw_us_estimator.f, 's', markersize=10, label=adw_us_estimator.name)
ax[0].plot(adw_us_x, adw_us_f, '-*', linewidth=2, markersize=9, color='black', label='Reference')
ax[0].set_xlabel(r"configuration state", fontsize=20)
ax[0].set_ylabel(r"f / kT", fontsize=20)
ax[1].plot(adw_us_estimator.umbrella_centers[:, 0], adw_us_estimator.f_therm, 's', markersize=10, label=adw_us_estimator.name)
ax[1].set_xlabel(r"umbrella_center", fontsize=20)
ax[1].set_ylabel(r"f_therm / kT", fontsize=20)
for _ax in ax:
_ax.tick_params(labelsize=15)
_ax.set_ylim([0, 12])
_ax.legend(loc=4, fontsize=10, fancybox=True, framealpha=0.5)
fig.tight_layout()
In [21]:
# load unbiased data
adw_md_trajs = [np.load(_adw_us_data_path + "md_traj_%03d.npy" % i) for i in range(5)]
# redo clustering with both, biased and unbiased data
adw_us_cluster = pyemma.coordinates.cluster_regspace(adw_us_trajs + adw_md_trajs, max_centers=500, dmin=0.2)
# split dtrajs into biased and unbiased
adw_us_dtrajs = adw_us_cluster.dtrajs[:len(adw_us_trajs)]
adw_md_dtrajs = adw_us_cluster.dtrajs[len(adw_us_trajs):]
In [22]:
# plot order parameter trajectories of the unbiased simulations
for t in adw_md_trajs:
plt.plot(t)
plt.ylabel('x')
plt.xlabel('step')
Out[22]:
In [23]:
# run the estimator again for a sequence of lag times
lags = [1, 2, 5, 7, 10, 15, 20, 30, 40, 50, 70, 100]
memms = pyemma.thermo.estimate_umbrella_sampling(
adw_us_trajs, adw_us_dtrajs, adw_us_umbrella_centers, adw_us_force_constants,
md_trajs=adw_md_trajs, md_dtrajs=adw_md_dtrajs,
lag=lags,
maxiter=100000, maxerr=1.0E-15, save_convergence_info=50, estimator='dtram')
In [24]:
# TRAM
#lags = [1, 10, 50]
#memms = pyemma.thermo.estimate_umbrella_sampling(
# adw_us_trajs, adw_us_dtrajs, adw_us_umbrella_centers, adw_us_force_constants,
# md_trajs=adw_md_trajs, md_dtrajs=adw_md_dtrajs,
# lag=lags,
# maxiter=100000, maxerr=1.0E-6, init_maxerr=1.0, save_convergence_info=50, estimator='tram', direct_space=True)
In [25]:
[ m.name for m in memms ]
Out[25]:
In [26]:
# plot implied time scales depending on lag time
pyemma.plots.plot_memm_implied_timescales(memms)
Out[26]:
In [19]:
# at 10 steps the implied time scales look converged, pick that model for analysis
print(memms[4].lag)
dtram_estiamtor = memms[4]
In [20]:
# for TRAM
#print(memms[1].lag)
#dtram_estiamtor = memms[1]
In [21]:
# plot estimate of the stationary distribution
adw_us_x, adw_us_f = shortcuts.adw_match_reference_to_binning(adw_us_trajs, adw_us_cluster.clustercenters)
plt.figure(figsize=(2 * pw, ph))
plt.plot(
adw_us_cluster.clustercenters[dtram_estiamtor.active_set, 0], dtram_estiamtor.f, 's', markersize=10, label=dtram_estiamtor.name)
plt.plot(adw_us_x, adw_us_f, '-*', linewidth=2, markersize=9, color='black', label='Reference')
plt.xlabel(r"configuration state", fontsize=20)
plt.ylabel(r"f / kT", fontsize=20)
Out[21]:
The pyemma.thermo module provides the following API functions to perform dTRAM and WHAM estimations:
def dtram(
ttrajs, dtrajs, bias, lag,
maxiter=10000, maxerr=1.0E-15, save_convergence_info=0,
dt_traj='1 step', init=None):
...
def wham(
ttrajs, dtrajs, bias,
maxiter=100000, maxerr=1.0E-15, save_convergence_info=0,
dt_traj='1 step'):
...
ttrajs is a list of numpy.ndarray objects with shape=(T_i,), where T_i denotes the number of frames in trajectory i. The entries indicate in which thermodynamic state each frame was created.dtrajs is a list of numpy.ndarray objects with shape=(T_i,), where T_i denotes the number of frames in trajectory i. The entries indicate to which discrete configuration states each frame belongs.bias is a numpy.ndarray with shape=(K, N), where K is the number of thermodynamic states and N is the number of discrete configuration states. The elements are the dimensionless bias energies for all combinations of discrete configuration and thermodynamic states.lag is the lag time in steps at which transitions are counted.def tram(
ttrajs, dtrajs, bias, lag,
maxiter=10000, maxerr=1.0E-15, save_convergence_info=0,
dt_traj='1 step', init=None, direct_space=False):
...
def mbar(
ttrajs, dtrajs, bias,
maxiter=100000, maxerr=1.0E-15, save_convergence_info=0,
dt_traj='1 step', direct_space=False):
...
The bias parameter of bin-less estimators has a different formet than for binned estimators:
bias is a (numpy.ndarray(T, num_therm_states), or list of numpy.ndarray(T_i, num_therm_states)) – A single reduced bias energy trajectory or a list of reduced bias energy trajectories. For every simulation frame seen in trajectory i and time step t, btrajs[i][t, k] is the reduced bias energy of that frame evaluated in the k’th thermodynamic state (i.e. at the k’th umbrella/Hamiltonian/temperature)The parameter direct_space allows to optimize the calculation for speed.
direct_space is an optional boolean parameter that is false by default. – Whether to perform the self-consitent iteration with Boltzmann factors (direct space) or free energies (log-space). Calculations in direct space are faster. When analyzing data from multi-temperature simulations, direct-space is not recommended.To make the preparation of ttrajs and bias easier, we provide two further API functions to handle the preparation for certain types of simulations, i.e., multi-temperature and umbrella sampling with harmonic bias potentials.
In [ ]:
In [ ]:
In [ ]:
In [ ]: