Pyrrol Torsion Fitting


In [1]:
%matplotlib inline
import pymc
import seaborn
import numpy as np
import glob
import matplotlib as mpl
import matplotlib.pyplot as plt
from parmed.charmm import CharmmParameterSet
import torsionfit.TorsionFitModel as TorsionFitModel
import torsionfit.TorsionScanSet as TorsionScanSet
import torsionfit.utils as utils
import simtk.openmm as omm
  • Load charmm cgenff forcefield, pyrrol stream file from paramchem and pyrrol psf file into CharmmParameterSet
  • Load Gaussian output files for torsion scan
  • Parse torsion scan Gaussian output files with TorsionScanSet.read_scan_logfiles and extract optimized geometry

In [2]:
param = CharmmParameterSet('../charmm_ff/top_all36_cgenff.rtf', '../charmm_ff/par_all36_cgenff.prm')
stream = 'pyrrol.str'
structure = 'pyrrol.psf'
scan = glob.glob('torsion-scan/*.log')
pyrrol = TorsionScanSet.read_scan_logfile(scan, structure)
# extract only qm optimized structures
pyrrol_opt = pyrrol.extract_geom_opt()

In [3]:
# create pymc model
model = TorsionFitModel.TorsionFitModel(param, stream, pyrrol_opt)
mcmc = pymc.MCMC(model.pymc_parameters)

In [4]:
# import database
db = pymc.database.sqlite.load('phase.test.db.sqlite')
db2 = pymc.database.sqlite.load('pyrrol.database_3')

Plot of qm energy and initial mm energy before fit


In [19]:
plt.plot(pyrrol_opt.qm_energy), plt.plot(pyrrol_opt.mm_energy)


Out[19]:
([<matplotlib.lines.Line2D at 0x118f2a790>],
 [<matplotlib.lines.Line2D at 0x118f2aa10>])

Samples from the posterior of torsion energy profiles for Pyrrol


In [48]:
seaborn.tsplot(db.trace('mm_energy')[100:], err_style='unit_traces'), plt.plot(pyrrol_opt.qm_energy, color='r')


Out[48]:
(<matplotlib.axes._subplots.AxesSubplot at 0x132256250>,
 [<matplotlib.lines.Line2D at 0x132ccded0>])

In [70]:
seaborn.jointplot(db.trace('CG251O_NG3C51_CG321_CG321_2_K')[1000:], db.trace('CG251O_NG3C51_CG2R51_HGR52_6_K')[1000:], kind='kde')#.plot_joint(seaborn.kdeplot, zorder=1, n_levels=6))


Out[70]:
<seaborn.axisgrid.JointGrid at 0x12f04f310>

In [53]:
seaborn.jointplot(db.trace('CG251O_NG3C51_CG2R51_CG2R51_multiplicity_bitstring')[100:], db.trace('CG251O_NG3C51_CG2R51_HGR52_multiplicity_bitstring')[100:], kind='hex')


/Users/sternc1/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.py:213: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  n = np.zeros(bins, ntype)
/Users/sternc1/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.py:249: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  n += np.bincount(indices, weights=tmp_w, minlength=bins).astype(ntype)
/Users/sternc1/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.py:213: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  n = np.zeros(bins, ntype)
/Users/sternc1/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.py:249: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  n += np.bincount(indices, weights=tmp_w, minlength=bins).astype(ntype)
Out[53]:
<seaborn.axisgrid.JointGrid at 0x12978ed10>

The cell below plots a heat map showing probability that any given multiplicity term (x-axis) is active for a given torsion (y-axis)


In [55]:
mult_bitstring = []
for i in model.pymc_parameters.keys():
    if i.split('_')[-1] == 'bitstring':
        mult_bitstring.append(i)

bitmask = [1,2,3,4,6]
multiplicities = np.zeros((len(mult_bitstring), 10000,5))

for m, torsion in enumerate(mult_bitstring):
    for i,j in enumerate(db.trace('%s' % torsion)[100:]):
        for k,l in enumerate(bitmask):
            if 2**(l-1) & int(j):
                multiplicities[m][i][k] = 1

print m   
plot = plt.figure()
plt.matshow(multiplicities.sum(1), cmap='jet',  extent=[0, 5, 0, 20]), plt.colorbar()
print m   
print fig
plt.yticks([])
#for k in range(m):
#    plt.axhline(k, color='black', lw=1)


37
37
(<matplotlib.image.AxesImage object at 0x117d98d50>, <matplotlib.colorbar.Colorbar instance at 0x117d8c680>)
Out[55]:
([], <a list of 0 Text yticklabel objects>)
<matplotlib.figure.Figure at 0x120ecc0d0>

In [9]:
mult_bitstring = []
for i in model.pymc_parameters.keys():
    if i.split('_')[-1] == 'bitstring':
        mult_bitstring.append(i)

bitmask = [1,2,3,4,6]
multiplicities = np.zeros((len(mult_bitstring), 10000,5))

for m, torsion in enumerate(mult_bitstring):
    for i,j in enumerate(db2.trace('%s' % torsion)[100:]):
        for k,l in enumerate(bitmask):
            if 2**(l-1) & int(j):
                multiplicities[m][i][k] = 1
                
plt.matshow(multiplicities.sum(1), cmap='jet'), plt.colorbar()


Out[9]:
(<matplotlib.image.AxesImage at 0x14d3fa490>,
 <matplotlib.colorbar.Colorbar instance at 0x14d052fc8>)

In [62]:
#rmse = utils.RMSE(pyrrol_opt, db)
#log_sigma = plt.plot(db.trace('log_sigma')[:], label = 'log_sigma')
sigma_plot = plt.plot(np.exp(db.trace('log_sigma')[:]), label = 'sigma')
#rmse = plt.plot(rmse, label = 'rmse')
#deviance = plt.plot(db.trace('deviance')[100:], label = 'deviance')
plt.legend()


Out[62]:
<matplotlib.legend.Legend at 0x13d133d50>

In [51]:
error = (db.trace('mm_energy')[:,:] - np.tile(pyrrol_opt.qm_energy, [10000,1]))
rms_error = np.sqrt((error**2).mean(1))

In [63]:
plt.plot(rms_error[100:],'.', label='RMSE')


Out[63]:
[<matplotlib.lines.Line2D at 0x13ddbcf10>]

In [64]:
deviance = plt.plot(db.trace('deviance')[100:], label = 'deviance')



In [31]:
plt.plot(db.trace('mm_energy')[9000:,:].T, '.');



In [32]:
plt.plot(pyrrol_opt.qm_energy, '.')


Out[32]:
[<matplotlib.lines.Line2D at 0x1528b5f90>]

In [8]:
pyrrol_opt.to_dataframe()


Out[8]:
torsion scan_direction step_point_total QM_energy KJ/mol MM_energy KJ/mole delta KJ/mole
0 [2, 5, 6, 7] 0 [2, 1, 7] 0.0 kJ/mol 0.000343863682076 kJ/mol -0.000343863682076 kJ/mol
1 [2, 5, 6, 7] 0 [6, 2, 7] 1.90520169202 kJ/mol 2.59947733462 kJ/mol -0.694275642607 kJ/mol
2 [2, 5, 6, 7] 0 [5, 3, 7] 6.82636470697 kJ/mol 8.09602955797 kJ/mol -1.269664851 kJ/mol
3 [2, 5, 6, 7] 0 [9, 4, 7] 12.5025032718 kJ/mol 13.093642523 kJ/mol -0.591139251217 kJ/mol
4 [2, 5, 6, 7] 0 [6, 5, 7] 15.9201660403 kJ/mol 14.2661116646 kJ/mol 1.65405437572 kJ/mol
5 [2, 5, 6, 7] 0 [6, 6, 7] 14.3299796721 kJ/mol 11.5869163876 kJ/mol 2.74306328453 kJ/mol
6 [2, 5, 6, 7] 0 [6, 7, 7] 9.57705079764 kJ/mol 8.3769550108 kJ/mol 1.20009578684 kJ/mol
7 [2, 5, 6, 7] 1 [2, 1, 7] 0.0 kJ/mol 0.000343863682076 kJ/mol -0.000343863682076 kJ/mol
8 [2, 5, 6, 7] 1 [7, 2, 7] 2.01192037575 kJ/mol 1.66443540637 kJ/mol 0.347484969378 kJ/mol
9 [2, 5, 6, 7] 1 [9, 3, 7] 6.93811909936 kJ/mol 7.41896450498 kJ/mol -0.480845405625 kJ/mol
10 [2, 5, 6, 7] 1 [6, 4, 7] 12.0456847125 kJ/mol 14.3335284418 kJ/mol -2.28784372931 kJ/mol
11 [2, 5, 6, 7] 1 [5, 5, 7] 14.1173220776 kJ/mol 19.4638202203 kJ/mol -5.3464981427 kJ/mol
12 [2, 5, 6, 7] 1 [6, 6, 7] 12.0229426346 kJ/mol 15.1721579378 kJ/mol -3.14921530314 kJ/mol
13 [2, 5, 6, 7] 1 [5, 7, 7] 6.96373609942 kJ/mol 9.32436239529 kJ/mol -2.36062629586 kJ/mol
14 [3, 2, 5, 6] 0 [2, 1, 7] 0.0 kJ/mol 0.00145391481689 kJ/mol -0.00145391481689 kJ/mol
15 [3, 2, 5, 6] 0 [9, 2, 7] 0.619972357992 kJ/mol 1.05498510674 kJ/mol -0.435012748749 kJ/mol
16 [3, 2, 5, 6] 0 [4, 3, 7] 3.54583966092 kJ/mol 8.94899662684 kJ/mol -5.40315696592 kJ/mol
17 [3, 2, 5, 6] 0 [15, 4, 7] 6.53315940895 kJ/mol 30.0376532877 kJ/mol -23.5044938787 kJ/mol
18 [3, 2, 5, 6] 0 [5, 5, 7] 5.68578465027 kJ/mol 31.4521257182 kJ/mol -25.7663410679 kJ/mol
19 [3, 2, 5, 6] 0 [6, 6, 7] 5.24347197672 kJ/mol 30.2435560272 kJ/mol -25.0000840505 kJ/mol
20 [3, 2, 5, 6] 0 [5, 7, 7] 5.15902278072 kJ/mol 28.8921527483 kJ/mol -23.7331299676 kJ/mol
21 [3, 2, 5, 6] 1 [2, 1, 7] 0.0 kJ/mol 0.00145391481689 kJ/mol -0.00145391481689 kJ/mol
22 [3, 2, 5, 6] 1 [9, 2, 7] 0.535307870945 kJ/mol 8.44945116232 kJ/mol -7.91414329137 kJ/mol
23 [3, 2, 5, 6] 1 [5, 3, 7] 2.52524756361 kJ/mol 22.2297109959 kJ/mol -19.7044634323 kJ/mol
24 [3, 2, 5, 6] 1 [6, 4, 7] 7.17685578158 kJ/mol 38.4801909664 kJ/mol -31.3033351849 kJ/mol
25 [3, 2, 5, 6] 1 [8, 5, 7] 15.9729674635 kJ/mol 55.8500344041 kJ/mol -39.8770669406 kJ/mol
26 [3, 2, 5, 6] 1 [9, 6, 7] 24.2102494228 kJ/mol 65.9678877088 kJ/mol -41.757638286 kJ/mol
27 [3, 2, 5, 6] 1 [8, 7, 7] 30.0654654418 kJ/mol 65.4215929673 kJ/mol -35.3561275255 kJ/mol
28 [1, 2, 5, 6] 0 [2, 1, 7] 0.0 kJ/mol 0.000588033778186 kJ/mol -0.000588033778186 kJ/mol
29 [1, 2, 5, 6] 0 [8, 2, 7] 1.14114978968 kJ/mol 6.70937038696 kJ/mol -5.56822059728 kJ/mol
30 [1, 2, 5, 6] 0 [6, 3, 7] 3.56173443573 kJ/mol 14.2341703832 kJ/mol -10.6724359475 kJ/mol
31 [1, 2, 5, 6] 0 [5, 4, 7] 5.73483948573 kJ/mol 19.5658429753 kJ/mol -13.8310034896 kJ/mol
32 [1, 2, 5, 6] 0 [6, 5, 7] 6.79709038569 kJ/mol 23.559254461 kJ/mol -16.7621640753 kJ/mol
33 [1, 2, 5, 6] 0 [7, 6, 7] 6.3118166615 kJ/mol 26.2878268557 kJ/mol -19.9760101942 kJ/mol
34 [1, 2, 5, 6] 0 [7, 7, 7] 5.3443357962 kJ/mol 28.4984812526 kJ/mol -23.1541454564 kJ/mol
35 [1, 2, 5, 6] 1 [2, 1, 7] 0.0 kJ/mol 0.000588033778186 kJ/mol -0.000588033778186 kJ/mol
36 [1, 2, 5, 6] 1 [8, 2, 7] 1.69309282652 kJ/mol 5.90129487457 kJ/mol -4.20820204805 kJ/mol
37 [1, 2, 5, 6] 1 [6, 3, 7] 7.70157763804 kJ/mol 24.1279259979 kJ/mol -16.4263483598 kJ/mol
38 [1, 2, 5, 6] 1 [7, 4, 7] 17.4741493938 kJ/mol 48.122778014 kJ/mol -30.6486286201 kJ/mol
39 [1, 2, 5, 6] 1 [11, 5, 7] 26.9090740191 kJ/mol 51.8891708861 kJ/mol -24.9800968671 kJ/mol
40 [1, 2, 5, 6] 1 [9, 6, 7] 29.825696938 kJ/mol 47.2349399156 kJ/mol -17.4092429777 kJ/mol
41 [1, 2, 5, 6] 1 [7, 7, 7] 29.5742160804 kJ/mol 47.0450139117 kJ/mol -17.4707978313 kJ/mol
42 [8, 1, 2, 5] 0 [2, 1, 7] 0.0 kJ/mol 0.0 kJ/mol 0.0 kJ/mol
43 [8, 1, 2, 5] 0 [7, 2, 7] 1.03520037734 kJ/mol 5.67101994206 kJ/mol -4.63581956472 kJ/mol
44 [8, 1, 2, 5] 0 [6, 3, 7] 5.09211717348 kJ/mol 25.5264419728 kJ/mol -20.4343247994 kJ/mol
45 [8, 1, 2, 5] 0 [5, 4, 7] 12.4859599982 kJ/mol 55.5751756006 kJ/mol -43.0892156024 kJ/mol
46 [8, 1, 2, 5] 0 [4, 5, 7] 22.4316808301 kJ/mol 92.6448189343 kJ/mol -70.2131381042 kJ/mol
47 [8, 1, 2, 5] 0 [5, 6, 7] 34.5074616448 kJ/mol 131.732700565 kJ/mol -97.2252389205 kJ/mol
48 [8, 1, 2, 5] 0 [5, 7, 7] 48.9280053015 kJ/mol 168.722012178 kJ/mol -119.794006876 kJ/mol
49 [8, 1, 2, 5] 1 [2, 1, 7] 0.0 kJ/mol 0.0 kJ/mol 0.0 kJ/mol
50 [8, 1, 2, 5] 1 [9, 2, 7] 0.928127250867 kJ/mol 12.7986271639 kJ/mol -11.8704999131 kJ/mol
51 [8, 1, 2, 5] 1 [6, 3, 7] 4.67518783023 kJ/mol 37.2782681106 kJ/mol -32.6030802804 kJ/mol
52 [8, 1, 2, 5] 1 [7, 4, 7] 12.0959525284 kJ/mol 69.774142099 kJ/mol -57.6781895705 kJ/mol
53 [8, 1, 2, 5] 1 [13, 5, 7] 21.1580299495 kJ/mol 105.711004518 kJ/mol -84.5529745682 kJ/mol
54 [8, 1, 2, 5] 1 [6, 6, 7] 31.5106795963 kJ/mol 144.521703477 kJ/mol -113.011023881 kJ/mol
55 [8, 1, 2, 5] 1 [5, 7, 7] 43.5835776125 kJ/mol 181.31456124 kJ/mol -137.730983627 kJ/mol

In [14]:
plt.plot(pyrrol_opt.qm_energy, '.')


Out[14]:
[<matplotlib.lines.Line2D at 0x1189b6d50>]

In [ ]: