Imports and such


In [4]:
import collections
import errno
import sys, os, re, subprocess, glob
import time
import matplotlib.pyplot as plt
import MDAnalysis
import MDAnalysis.analysis
import MDAnalysis.analysis.rdf
import numpy as np
import pandas as pd
import six
from importlib import reload

import paratemp.coordinate_analysis as ca
import paratemp as pt
from paratemp.re_universe import REUniverse
import thtools
from thtools import cd, merge_two_dicts
from thtools import save_obj, load_obj, make_obj_dir

In [5]:
reload(ca)
reload(pt)
reload(thtools)


Out[5]:
<module 'thtools' from '/usr3/graduate/theavey/.local/lib/python3.6/site-packages/thtools/__init__.py'>

Parse distances from PLUMED input


In [ ]:
def parse_plumed_dists(p_plumed, verbose=True):
    """
    Read a plumed input and return dict of defined dists
    
    Note, this returns 1-based indexes for the atoms which is what
    MDAnalysis will need and what PLUMED/GROMACS use, but it is
    different than VMD (and Python, generally)."""
    with open(p_plumed, 'r') as f_plumed:
        init_lines = f_plumed.readlines()
    lines = []
    for line in init_lines:
        lines.append(line.split('#')[0])
    dists = dict()
    for line in lines:
        if 'DISTANCE' in line:
            m = re.search(r'(\S+):.+ATOMS=(\d+),(\d+)', line)
            if m:
                n, a1, a2 = m.groups()
                dists[n] = (int(a1), int(a2))
            else:
                n = re.search(r'(\S+):.+', line).group(1)
                if verbose:
                    print('Unable to define atoms '
                          'for distance: {}'.format(n))
    return dists

Create dicts of files, folders, and distances

Using parse_plumed_dists for a single set of simulations


In [ ]:
l_configs = ['MaEn', 
             'MaEx', 
             'MiEn', 
             'MiEx']
dp_configs = dict()
for c in l_configs:
    dp_configs[c] = os.path.abspath(
        os.path.join('PTAD-cinnamate/', c))

p_gro = os.path.abspath('PTAD-cinnamate/MaEn/tad-MaEn-solutes.gro')

d_temp = parse_plumed_dists('PTAD-cinnamate/MaEn/plumed-cin-ptad-MaEn.dat')
d_metad_dists = {'CV1': d_temp['dm1'], 'CV2': d_temp['dm2']}
del(d_temp)
d_ox_dists = {'O-O': [69, 71], 'O(l)-Cy': [69, 75], 'O(r)-Cy': [71, 75]}

For multiple simulations


In [ ]:
dp_catff = dict(phen_cg='CGenFF-3-body/PT/PTAD-cinnamate/',
                phen_ga='repeat-juanma-w-pt/tad-cinnamate/',
                naph_ga='repeat-juanma-w-pt/ntad-cinnamate/')
dp_l_configs = dict(MaEn='major-endo',
                    MaEx='major-exo',
                    MiEn='minor-endo',
                    MiEx='minor-exo')
dp_s_configs = dict(MaEn='MaEn',
                    MaEx='MaEx',
                    MiEn='MiEn',
                    MiEx='MiEx')
dd_configs = dict(phen_cg=dp_s_configs,
                  phen_ga=append_to_keys(dp_l_configs, '13-3htmf-etc/05'),
                  naph_ga=append_to_keys(dp_l_configs, '02-PT'))
dp_gros = dict(phen_cg='/projectnb/nonadmd/theavey/CGenFF-3-body/PT/PTAD-cinnamate/MaEn/tad-MaEn-solutes.gro',
               phen_ga=os.path.abspath('repeat-juanma-w-pt/tad-cinnamate/solutes.gro'),
               naph_ga=os.path.abspath('repeat-juanma-w-pt/ntad-cinnamate/ntad-cinnamate.gro'))
dd_cv_def = dict(naph_ga={'O-O': [53, 29], 'O(l)-dm': [53, 4], 'O(r)-dm': [29, 4], 'CV1':[129,53], 'CV2':[102,68]})

Create REUniverses and calculate some dists

Just import the Universes and read_data


In [ ]:
d_reus = dict()
for config in dp_configs:
    reu = REUniverse(p_gro, dp_configs[config], traj_glob='npt*xtc')
    d_reus[config] = reu
    for u in reu:
        u.read_data()

Import and calculate distances if necessary

Could also be done by read, calc, save because it now will not do any unnecessary steps


In [ ]:
d_reus = dict()

for catff in dp_catff:
    dp_configs = dd_configs[catff]
    top = dp_gros[catff]
    for config in dp_configs:
        key = f'{catff}_{config}'
        bf = os.path.join(dp_catff[catff], dp_configs[config])
        reu = REUniverse(top, bf, traj_glob='npt*.xtc')
        for u in reu:
            try:
                u.read_data()
            except OSError:
                d_cv_def = dd_cv_def[catff]
                u.calculate_distances(**d_cv_def)
                u.save_data()
        d_reus[key] = reu

Plotting

Simple 1D FESs for Universes in an REUniverse


In [ ]:
figs = []
for u in reu:
    fig = u.fes_1d('O-O', bins=15, linewidth=2)[3]
    figs.append(fig)

Simple 2D FESs for Universes in an REUniverse


In [ ]:
figs = []
for u in reu:
    fig, ax = u.fes_2d(x='CV1', y='CV2', 
                       xlabel='CV 1', ylabel='CV 2')[3:5]

Using ca.fes_array_3_legend


In [ ]:
x_lims = np.zeros([64, 2])
j = 0
for config in d_ptus:
    for i, u in enumerate(d_ptus[config]):
        u.figs = dict()
        u.read_data(ignore_no_data=True)
        fig, axes = ca.fes_array_3_legend(u.data, temp=u.temperature, 
                                          labels=('O-O', 'O(l)-Cy', 'O(r)-Cy'),
                                          bins=15, linewidth=2.0)[3:]
        ax = axes.flat[0]
        if ax.get_ylim()[1] > 10:
            for ax in axes.flat[:3]:
                ax.set_ylim((-0.5, 7))
        fig.tight_layout(rect=[0, 0, 1, 0.95])
        fig.suptitle('{} {:.0f} K'.format(config, u.temperature))
        u.figs['fes_ox_dists_bins'] = fig
        x_lims[j] = ax.get_xlim()
        j += 1

Radial distributions

Calculate radial distributions


In [ ]:
name_gro = p_gro

name_gro = os.path.abspath(name_gro)

fig, ax = plt.subplots()

bins_CV_Os = {}
rdfs_CV_Os = {}

for key in sorted(dp_configs):
# for key in ['MiEx']:
    with cd(dp_configs[key]):
        i = 0
        print 'Now starting on {} {}...'.format(key, i)
        univ = d_reus[key][i]
        final_time = univ.final_time_str
        file_name_end = '-PT-phen-cg-{}-{}-{}.pdf'.format(key, i, final_time)
        
        reactant_CV_Os = univ.select_atoms('(resname is 3htmf) and (name is O1 or name is O2)')
        catalyst_CV_Os = univ.select_atoms('(resname is TAD or resname is tad) and (name is O1 or name is OH)')
        
        rcrdf = MDAnalysis.analysis.rdf.InterRDF(
            reactant_CV_Os, catalyst_CV_Os, range=(2.0, 12.0))
        rcrdf.run()
        
        print rcrdf.count
        
        bins_CV_Os[key] = rcrdf.bins
        rdfs_CV_Os[key] = rcrdf.rdf
        
        ax.plot(rcrdf.bins, rcrdf.rdf, label=key)

ax.legend()
fig

Make FES from radial distributions


In [ ]:
r = 0.0019872
temp = univ.temperature

g_CV_Os = {}
for key in rdfs_CV_Os:
    rdfs = rdfs_CV_Os[key]
    g_CV_Os[key] = - r * temp * np.log(rdfs + 1e-40)
min_g = min([min(gs) for gs in g_CV_Os.values()])
for key in g_CV_Os:
    g_CV_Os[key] = g_CV_Os[key] - min_g

fig, ax = plt.subplots()

for key in sorted(g_CV_Os):
    ax.plot(bins_CV_Os[key], g_CV_Os[key], label=key)
    ax.set_xlim([2.4,9.9])
    ax.set_ylim([-0.1,2.7])
ax.legend()
ax.set_ylabel(r'$\Delta G$ / (kcal / mol)')
ax.set_xlabel('distance / $\mathrm{\AA}$')

fig

Plot FES from two Universes on the same axes


In [ ]:
x_lims = np.zeros((64, 2))
j = 0
df_pvn_same = dict()
df_figs = df_pvn_same
for key in d_reus:
    if 'naph' not in key:
        continue
    config = key[-4:]
    reu = d_reus[key]
    equiv_ga_reu = d_reus['phen_ga_'+config]
    for i, u in enumerate(reu):
        fig, ax = plt.subplots(1, 1)
        df_figs[f'{config}_{i}'] = fig
        u.fes_1d('O-O', 
                 bins=15, 
                 ax=ax, linewidth=2, label='naphthyl')
        equiv_ga_reu[i].fes_1d('O-O', 
                               bins=15, 
                               ax=ax, linewidth=2, label='phenanthryl')
        ax.set_xlim((3.24, 5.85))
        ax.set_aspect(0.3, adjustable='box-forced')
        if ax.get_ylim()[1] > 10:
            ax.set_ylim((-0.5, 7))
        ax.legend()
        fig.tight_layout()
        x_lims[j] = ax.get_xlim()
        j += 1
#         break

Pull out certain frames from a trajectory and save to disk


In [ ]:
# Cutoff values used for frame selection
cv1_cuts = [6.5, 9.]
cv2_cuts = [1.5, 3.]
name_set = 'lCV1-sCV2'  # (partial) name for the file

# instantiate the Universe object
univ = ca.Taddol('solutes.gro', 'major-endo/13-3htmf-etc/05/pbc-MaEn-0.xtc')
# Calculate/read-in the distance data
try:
    univ.data['CV1']
except KeyError:
    univ.read_data(filename='major-endo/13-3htmf-etc/05/npt-PT-MaEn-out0.h5')

# Create boolean array telling where the cutoffs are satisfied
bool_array = ((univ.data['CV1'] > cv1_cuts[0]) & (univ.data['CV1'] < cv1_cuts[1]) 
              & (univ.data['CV2'] > cv2_cuts[0]) & (univ.data['CV2'] < cv2_cuts[1]))
num = len(univ.data[bool_array])
print('These cutoffs include {} frames.'.format(num))

# Create solute atomselection to not save the solvent to disk
solutes = univ.select_atoms('resname is 3HT or resname is CIN or resname is TAD')

# write the selected frames into a new trajectory file
with mda.Writer('minim-structs-'+name_set+'-rjm-PT-MaEn-0.xtc', 
                solutes.n_atoms) as W:
    for ts in univ.trajectory:
        if bool_array[univ.trajectory.frame]:
            W.write(solutes)