In [ ]:
get_ipython().magic('pylab inline')
matplotlib.rcParams['figure.dpi'] = 600
import seaborn as sns
import corner
import json
from io import StringIO
import numpy as np
from pandas import DataFrame, Index
from scipy.stats import gaussian_kde
from osprey.config import Config
from msmbuilder.msm import MarkovStateModel
import matplotlib.pyplot as pp
import msmexplorer as msme
from msmreport.utils import convert_keys_to_string
rs = np.random.RandomState(42)
In [ ]:
config = Config('{yaml}')
ss = config.search_space()
In [ ]:
db = config.trials()
cmd = 'select parameters from trials_v3 where mean_test_score = (select MAX(mean_test_score) from trials_v3);'
query = db.execute(cmd)
result = query.fetchall()
params = convert_keys_to_string(json.load(StringIO(result[0][0])))
DataFrame(np.array(list(params.values()))[argsort(list(params.keys()))], Index(sort(list(params.keys())))).T
In [ ]:
data, labels = config.dataset()
pipeline = config.estimator()
pipeline.set_params(**params)
In [ ]:
pipeline.fit(data)
In [ ]:
scaler = pipeline.steps[0][1]
tica = pipeline.steps[1][1]
tica_data = tica.transform(scaler.transform(data))
_ = msme.plot_histogram(np.concatenate(tica_data, axis=0), color='oxblood', quantiles=(0.5,),
labels=['$tIC%s$' % i for i in range(tica.n_components)],
show_titles=True)
In [ ]:
_ = msme.plot_voronoi(pipeline.steps[2][1], xlabel='tIC0', ylabel='tIC1', cluster_centers=False)
Plot top five timescales
In [ ]:
lt_range = ss.variables['tica__lag_time']
lag_times = np.linspace(lt_range.min, lt_range.max, 20).astype(int)
models = []
timescales = []
for lag_time in lag_times:
model = MarkovStateModel(verbose=False, n_timescales=5,
lag_time=lag_time, ergodic_cutoff=1)
models.append(model.fit(pipeline.steps[2][1].labels_))
timescales.append(model.timescales_)
timescales = np.array(timescales)
figsize(12, 6)
for timescale in timescales.T:
_ = plot(lag_times, timescale)
xlabel('Lag Time\n($ns$)', size=14, labelpad=10)
ylabel('Relaxation Time ($ns$)', size=14, labelpad=10)
yscale('log')
Plot "best" timescale
In [ ]:
tidx = argmax(np.product((np.diff(timescales, axis=0) / timescales[:-1]) < 0.01, axis=1))
lag_time = lag_times[tidx]
msm = models[tidx]
_ = msme.plot_timescales(msm, ylabel=r'Relaxation Time ($ns$)')
Plot residuals
In [ ]:
_ = msme.plot_pop_resids(msm, color='tarragon')
Plot free energy landscape
In [ ]:
assigns = msm.fit_transform(pipeline.steps[2][1].labels_)
pi_0 = msm.populations_[np.concatenate(assigns, axis=0)]
# Free Energy Surface
obs = (0, 1)
ax = msme.plot_free_energy(np.concatenate(tica_data, axis=0),
obs=obs, n_samples=10000,
pi=pi_0, gridsize=100, vmax=5.,
n_levels=8, cut=5, xlabel='tIC0',
ylabel='tIC1', random_state=rs)
# MSM Network
pp.scatter(*pipeline.steps[2][1].cluster_centers_[:, obs].T,
s = pipeline.steps[2][1].counts_ / 100.)