In [1]:
%matplotlib inline
import openpathsampling as paths
import numpy as np
import matplotlib.pyplot as plt
import os
import openpathsampling.visualize as ops_vis
from IPython.display import SVG

Analyzing the flexible path length simulation

Load the file, and from the file pull our the engine (which tells us what the timestep was) and the move scheme (which gives us a starting point for much of the analysis).


In [2]:
filename = "tps_nc_files/alanine_dipeptide_tps.nc"

In [3]:
# note that this log will overwrite the log from the previous notebook
#import logging.config
#logging.config.fileConfig("logging.conf", disable_existing_loggers=False)

In [4]:
%%time
flexible = paths.AnalysisStorage(filename)


CPU times: user 46.1 s, sys: 3.84 s, total: 50 s
Wall time: 1min 16s

In [5]:
engine = flexible.engines[0]
flex_scheme = flexible.schemes[0]

In [6]:
print "File size: {0} for {1} steps, {2} snapshots".format(
    flexible.file_size_str,
    len(flexible.steps),
    len(flexible.snapshots)
)


File size: 18.19GB for 10001 steps, 961458 snapshots

In [7]:
# rough estimate of total time
sum(step.change.details.timing for step in flexible.steps if step.change.details is not None)


Out[7]:
25802.1073791995

In [8]:
step = flexible.steps[1]
step.change.details


Out[8]:
<openpathsampling.pathmover.Details at 0x12eff5c30>

That tell us a little about the file we're dealing with. Now we'll start analyzing the contents of that file. We used a very simple move scheme (only shooting), so the main information that the move_summary gives us is the acceptance of the only kind of move in that scheme. See the MSTIS examples for more complicated move schemes, where you want to make sure that frequency at which the move runs is close to what was expected.


In [9]:
flex_scheme.move_summary(flexible.steps)


shooting ran 100.000% (expected 100.00%) of the cycles with acceptance 5639/10000 (56.39%)

Replica history tree and decorrelated trajectories

The ReplicaHistoryTree object gives us both the history tree (often called the "move tree") and the number of decorrelated trajectories.

A ReplicaHistoryTree is made for a certain set of Monte Carlo steps. First, we make a tree of only the first 50 steps in order to visualize it. (All 10000 steps would be unwieldy.)

After the visualization, we make a second ReplicaHistoryTree of all the steps, in order to count the number of decorrelated trajectories.


In [10]:
replica_history = ops_vis.ReplicaEvolution(replica=0)
tree = ops_vis.PathTree(
    flexible.steps[0:25],
    replica_history
)
tree.options.css['scale_x'] = 3

SVG(tree.svg())


Out[10]:
+BFFBBBBFFBBcorstep024578131415182324

In [27]:
# can write to svg file and open with programs that can read SVG
with open("flex_tps_tree.svg", 'w') as f:
    f.write(tree.svg())

In [12]:
tree.options.movers['default']['new'] = 'single'
tree.options.css['scale_x'] = 3
tree.options.css['horizontal_gap'] = 0.1  # True is the same as 0.05
SVG(tree.svg())


Out[12]:
+BFFBBBBFFBB{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}corstep024578131415182324

In [15]:
print "Decorrelated trajectories:", len(tree.generator.decorrelated_trajectories)


Decorrelated trajectories: 2

In [16]:
full_history = ops_vis.PathTree(
    flexible.steps,
    ops_vis.ReplicaEvolution(
        replica=0
    )
)

n_decorrelated = len(full_history.generator.decorrelated_trajectories)

print "All decorrelated trajectories:", n_decorrelated


All decorrelated trajectories: 893

Path length distribution

Flexible length TPS gives a distribution of path lengths. Here we calculate the length of every accepted trajectory, then histogram those lengths, and calculate the maximum and average path lengths.

We also use engine.snapshot_timestep to convert the count of frames to time, including correct units.


In [17]:
path_lengths = [len(step.active[0].trajectory) for step in flexible.steps]
plt.hist(path_lengths, bins=40, alpha=0.5);
print "Maximum:", max(path_lengths), "("+str(max(path_lengths)*engine.snapshot_timestep)+")"
print "Average:", "{0:.2f}".format(np.mean(path_lengths)), "("+(np.mean(path_lengths)*engine.snapshot_timestep).format("%.3f")+")"


Maximum: 505 (10.1 ps)
Average: 82.10 (1.642 ps)

Path density histogram

Next we will create a path density histogram. Calculating the histogram itself is quite easy: first we reload the collective variables we want to plot it in (we choose the phi and psi angles). Then we create the empty path density histogram, by telling it which CVs to use and how to make the histogram (bin sizes, etc). Finally, we build the histogram by giving it the list of active trajectories to histogram.


In [13]:
from openpathsampling.numerics import HistogramPlotter2D

In [19]:
psi = flexible.cvs['psi']
phi = flexible.cvs['phi']
deg = 180.0 / np.pi

In [20]:
path_density = paths.PathDensityHistogram(cvs=[phi, psi],
                                          left_bin_edges=(-180/deg,-180/deg),
                                          bin_widths=(2.0/deg,2.0/deg))

In [21]:
# TODO: can we pre-cache all the trajectories, too? That might make this faster....
flexible.trajectories.cache_all()

In [17]:
%%time
path_dens_counter = path_density.histogram([s.active[0].trajectory for s in flexible.steps])
# TODO: for the real thing, run over *all* steps -- just takes 10 times longer


CPU times: user 7min 7s, sys: 1.74 s, total: 7min 8s
Wall time: 7min 9s

Now we've built the path density histogram, and we want to visualize it. We have a convenient plot_2d_histogram function that works in this case, and takes the histogram, desired plot tick labels and limits, and additional matplotlib named arguments to plt.pcolormesh.


In [18]:
tick_labels = np.arange(-np.pi, np.pi+0.01, np.pi/4)
plotter = HistogramPlotter2D(path_density, 
                             xticklabels=tick_labels,
                             yticklabels=tick_labels, 
                             label_format="{:4.2f}")
ax = plotter.plot(cmap="Blues")



In [19]:
# this is the figure we actually publish
# change xlim and ylim (which are in radians) to get the figure you want
xlim = (-np.pi, 0)
ylim = (-np.pi/2, np.pi)
#xlim = ylim = (-np.pi, np.pi)

state_color = (0.953, 0.867, 0.878)
plt.rcParams.update({'font.size': 14})

# main plotting
ax = plotter.plot(xlim=xlim, ylim=ylim, cmap="Blues")
trajA = flexible.steps[3000].active[0].trajectory
trajB = flexible.steps[2000].active[0].trajectory
plotter.plot_trajectory(trajA, '-k', lw=0.5)
plotter.plot_trajectory(trajB, '-r', lw=0.5)
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")

# adding something to show the states
alpha_R_xywh = (-180, -100, 180, 100)

# our states are rectangular, so we make rectangular patches
from matplotlib.patches import Rectangle
def state_patch(x, y, w, h):
    xy = np.array([x, y]) / deg
    wh = np.array([w, h]) / deg
    plot_xy = [plotter.to_bins(val, i) 
               for (i, val) in enumerate(xy)]
    plot_w, plot_h = wh / plotter.histogram.bin_widths
    return Rectangle(plot_xy, plot_w, plot_h, color=state_color)

ax.axes.add_patch(state_patch(-180, -100, 180, 100))  # alpha_R
ax.axes.add_patch(state_patch(-180, 100, 180, 100))   # C7eq
ax.axes.add_patch(state_patch(-180, -260, 180, 100))  # C7eq, wrapped around

plt.text(x=plotter.to_bins(-100/deg, 0), 
         y=plotter.to_bins(-60/deg, 1),
         s="$\\alpha_R$")
plt.text(x=plotter.to_bins(-100/deg, 0),
         y=plotter.to_bins(130/deg, 1),
         s="$C_{7eq}$")

# now we're going to clean up so our axes are in degrees
# save limits
xlim = plt.xlim()
ylim = plt.ylim()

# convert labels back to degree
def degree_ticks(locs_labels):
    locs, labels = locs_labels
    new_labels = []
    for label in labels:
        numeric = float(label.get_text())
        label.set_text("{:.0f}".format(numeric*deg))
        new_labels.append(label)
    return locs, labels
        
xlocs, xlabels = degree_ticks(plt.xticks())
plt.xticks(xlocs, xlabels)
ylocs, ylabels = degree_ticks(plt.yticks())
plt.yticks(ylocs, ylabels)
plt.xlim(*xlim)
plt.ylim(*ylim);

plt.tight_layout()
plt.savefig("AD_tps_pathdensity.pdf")



In [25]:
#import nglview as nv
#nv.show_mdtraj(traj.to_mdtraj())

In [28]:
trajA.to_mdtraj()


Out[28]:
<mdtraj.Trajectory with 66 frames, 1651 atoms, 546 residues, and unitcells at 0x12fa0d910>

In [ ]: