In [2]:
import sys
import json
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.base import clone
%matplotlib inline
import matplotlib.pyplot as pp

from hpmixtape import trial_to_fninval
sys.path.insert(0, '../fs-peptide-tpe/')
import experiment

def pipeline_from_trial(trial):
    spec = trial_to_fninval(trial, experiment.modelspace)
    return spec['_factory'](spec)

In [3]:
from mixtape.datasets import fetch_fs_peptide

def load_trajectories():
    dataset = fetch_fs_peptide()
    trajectories = dataset['trajectories']
    return trajectories

trajectories = load_trajectories()


loading trajectory_1.xtc...
loading trajectory_10.xtc...
loading trajectory_11.xtc...
loading trajectory_12.xtc...
loading trajectory_13.xtc...
loading trajectory_14.xtc...
loading trajectory_15.xtc...
loading trajectory_16.xtc...
loading trajectory_17.xtc...
loading trajectory_18.xtc...
loading trajectory_19.xtc...
loading trajectory_2.xtc...
loading trajectory_20.xtc...
loading trajectory_21.xtc...
loading trajectory_22.xtc...
loading trajectory_23.xtc...
loading trajectory_24.xtc...
loading trajectory_25.xtc...
loading trajectory_26.xtc...
loading trajectory_27.xtc...
loading trajectory_28.xtc...
loading trajectory_3.xtc...
loading trajectory_4.xtc...
loading trajectory_5.xtc...
loading trajectory_6.xtc...
loading trajectory_7.xtc...
loading trajectory_8.xtc...
loading trajectory_9.xtc...

In [17]:
# from pprint import pprint
# trials = json.load(open('../fs-peptide-tpe/results-827f31e.json'))
# trials = sorted(trials, key=lambda x: x['result'].get('loss', 0))
# pprint([t['result'].get('parameters', None) for t in trials][:5])

In [5]:
p = pipeline_from_trial(trials[0])
p.fit(trajectories)
all_tics = clone(Pipeline(p.steps[0:2])).fit_transform(trajectories)
tics = np.concatenate(all_tics)
print p


Pipeline(steps=[('DihedralFeaturizer', DihedralFeaturizer(sincos=True, types=('phi', 'psi', 'chi1', 'chi2'))), ('tICA', tICA(gamma=0, lag_time=1, n_components=2, weighted_transform=True)), ('MiniBatchKMeans', MiniBatchKMeans(batch_size=10000, compute_labels=True, init='k-means++',
        init_size=None, max..., lag_time=1, n_timescales=3, prior_counts=0,
         reversible_type='transpose', verbose=False))])

In [19]:
pp.hexbin(tics[:,0], tics[:,1], cmap='hot_r', bins='log')
pp.colorbar()
centers = p.named_steps['MiniBatchKMeans'].cluster_centers_
sizes = p.named_steps['MarkovStateModel'].populations_ * 1e5
pp.scatter(centers[:, 0], centers[:, 1], c='none', s=sizes, alpha=0.3)
all_tics = np.array(all_tics)
pp.scatter(all_tics[:, 0, 0], all_tics[:, 0, 1], c='k')
pp.xlabel('tIC 1', fontsize=18)
pp.ylabel('tIC 2', fontsize=18)
pp.xlim(-400, 1100)
pp.ylim(-400, 600)
fig = pp.gcf()
# pp.savefig('/home/rmcgibbo/projects/papers/optimal-msms-presentation/pres/figures/best-fs-model-tics.png')



In [14]:
from mdtraj.html import enable_notebook
enable_notebook()



In [15]:
from mdtraj.html import TrajectoryHeatmap
t = trajectories[0].join(trajectories[1:])
t.center_coordinates()
t.superpose(t[0])


Out[15]:
<mdtraj.Trajectory with 280000 frames, 264 atoms, 23 residues, without unitcells at 0x7fa33dc4b790>

In [20]:
TrajectoryHeatmap(t, x=tics[:,0], y=tics[:, 1], fig=fig,
                  primaryStructure='lines')


(-247.05771169354836, -107.1572580645161, 163915)
(-257.13835685483872, -123.28629032258061, 117340)
(-236.97706653225805, -155.54435483870964, 174976)
(-236.97706653225805, -107.1572580645161, 62520)
(927.33744959677426, 288.00403225806451, 157355)
(866.85357862903231, 267.8427419354839, 154773)
(806.36970766129036, 275.90725806451621, 159845)
(806.36970766129036, 219.45564516129036, 153043)
(821.49067540322574, 215.42338709677426, 159459)
(-186.57384072580646, 239.61693548387098, 189064)
(-216.81577620967738, 247.68145161290329, 172432)
(-181.53351814516122, 227.52016129032256, 184668)
(-181.53351814516122, 195.26209677419354, 131067)
(-176.4931955645161, 219.45564516129036, 134408)
(-201.69480846774189, -115.22177419354836, 222912)
(-166.41255040322574, -143.44758064516128, 266068)
(-236.97706653225805, -50.705645161290306, 191573)
(-242.01738911290323, -131.35080645161287, 174863)
(-292.42061491935482, -95.060483870967687, 12908)
(-297.4609375, -111.1895161290322, 226730)
(-342.82384072580641, -111.1895161290322, 33623)
(-171.45287298387098, -179.73790322580641, 51729)
(367.86164314516134, -264.41532258064512, 254130)
(256.97454637096769, -288.60887096774189, 240068)
(216.65196572580646, -284.57661290322579, 252243)
(191.45035282258061, -256.35080645161287, 252894)
(75.52293346774195, -171.67338709677415, 211398)

In [ ]: