In [1]:
import salty
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [12]:
T = [297, 316] # select narrow state variable ranges
P = [99, 102] # we will set MD simulation to 101 kPa and 298 K
devmodel = salty.aggregate_data(['density', 'cpt'], T=T, P=P)
devmodel.Data_summary


Out[12]:
0
Unique salts 98
Cations array(['CCCC[n+]1ccc(cc1)C', 'CCCCCCCCn1cc[n+]...
Anions array(['[B-](F)(F)(F)F', 'F[P-](F)(F)(F)(F)F',...
Total datapoints 4030
cpt 207.47 - 1667.0
density 876.3 - 1551.8
Temperature range (K) 100.0 - 101.325
Pressure range (kPa) 297.15 - 315.65

In [18]:
devmodel.Data


Out[18]:
steiger-anion Marsili Partial Charges-anion BalabanJ-anion BertzCT-anion Ipc-anion HallKierAlpha-anion Kappa1-anion Kappa2-anion Kappa3-anion Chi0-anion ... VSA_EState10-cation Topliss fragments-cation Temperature, K Pressure, kPa Specific density, kg/m<SUP>3</SUP> Heat capacity at constant pressure, J/K/mol name-anion smiles-anion name-cation smiles-cation
0 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.091026 5.940171 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
1 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.091908 5.940171 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
2 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.092158 5.940171 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
3 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.091226 5.940171 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
4 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.091884 5.940171 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
5 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.091468 5.940171 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
6 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.091243 5.940171 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
7 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.516900 0.190998 7.089827 5.945421 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
8 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.086889 0.190998 7.088409 5.948035 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
9 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 0.343121 0.190998 7.086989 5.950643 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
10 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 0.773131 0.190998 7.085567 5.955837 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
11 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 1.203142 0.190998 7.084226 5.958425 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
12 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 1.633152 0.190998 7.082800 5.961005 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
13 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 2.063162 0.190998 7.081456 5.966147 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
14 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.004882 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
15 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.004882 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
16 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.017865 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
17 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.017865 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
18 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.016610 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
19 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.016610 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
20 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007872 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
21 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007872 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
22 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007872 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
23 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007872 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
24 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007057 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
25 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007057 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
26 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007057 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
27 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007057 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
28 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007057 6.210600 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
29 0.0 0.0 -0.898159 -0.885579 -0.193163 0.433964 -0.820105 -0.871227 -0.266849 -0.809217 ... 0.0 0.0 -0.946910 0.190998 7.007057 6.210198 tetrafluoroborate [B-](F)(F)(F)F 1-methyl-3-octylimidazolium CCCCCCCCn1cc[n+](c1)C
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4000 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.946910 0.190998 6.969415 6.453625 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4001 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.773131 0.190998 6.963285 6.475433 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4002 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.773131 0.190998 6.966967 6.475433 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4003 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.773131 0.190998 6.963190 6.475433 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4004 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.086889 0.190998 6.970542 6.464588 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4005 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.086889 0.190998 6.970167 6.464588 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4006 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.086889 0.190998 6.966307 6.464588 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4007 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.633152 0.190998 6.964514 6.484635 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4008 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.633152 0.190998 6.964136 6.484635 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4009 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.633152 0.190998 6.959968 6.484635 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4010 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.774906 0.190998 6.968756 6.456770 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4011 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.602902 0.190998 6.968192 6.458338 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4012 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.430898 0.190998 6.967532 6.459904 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4013 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 -0.258894 0.190998 6.966873 6.463029 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4014 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.085115 0.190998 6.965647 6.466145 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4015 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.257119 0.190998 6.964986 6.469250 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4016 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.429123 0.190998 6.964419 6.470800 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4017 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.601127 0.190998 6.963757 6.472346 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4018 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 0.945135 0.190998 6.962528 6.476972 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4019 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.117140 0.190998 6.961865 6.478510 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4020 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.289144 0.190998 6.961296 6.481577 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4021 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.461148 0.190998 6.960632 6.483107 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4022 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.805156 0.190998 6.959399 6.487684 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4023 0.0 0.0 -0.834889 0.267534 0.298575 0.746789 1.067328 4.177223 -0.267881 0.744431 ... 0.0 0.0 1.977160 0.190998 6.958734 6.489205 octyl sulfate CCCCCCCCOS(=O)(=O)[O-] 1-butyl-3-methylimidazolium CCCCn1cc[n+](c1)C
4024 0.0 0.0 0.104718 -0.449648 -0.193163 0.746789 -0.800247 -0.833493 -0.264610 -0.809217 ... 0.0 0.0 -0.086889 0.190998 7.217150 5.693732 hydrogen sulfate OS(=O)(=O)[O-] 1-ethyl-3-methylimidazolium CCn1cc[n+](c1)C
4025 0.0 0.0 0.104718 -0.449648 -0.193163 0.746789 -0.800247 -0.833493 -0.264610 -0.809217 ... 0.0 0.0 -0.086889 0.190998 7.218397 5.693732 hydrogen sulfate OS(=O)(=O)[O-] 1-ethyl-3-methylimidazolium CCn1cc[n+](c1)C
4026 0.0 0.0 0.104718 -0.449648 -0.193163 0.746789 -0.800247 -0.833493 -0.264610 -0.809217 ... 0.0 0.0 -0.086889 0.190998 7.217201 5.693732 hydrogen sulfate OS(=O)(=O)[O-] 1-ethyl-3-methylimidazolium CCn1cc[n+](c1)C
4027 0.0 0.0 0.104718 -0.449648 -0.193163 0.746789 -0.800247 -0.833493 -0.264610 -0.809217 ... 0.0 0.0 1.633152 0.190998 7.212516 5.710427 hydrogen sulfate OS(=O)(=O)[O-] 1-ethyl-3-methylimidazolium CCn1cc[n+](c1)C
4028 0.0 0.0 0.104718 -0.449648 -0.193163 0.746789 -0.800247 -0.833493 -0.264610 -0.809217 ... 0.0 0.0 1.633152 0.190998 7.213621 5.710427 hydrogen sulfate OS(=O)(=O)[O-] 1-ethyl-3-methylimidazolium CCn1cc[n+](c1)C
4029 0.0 0.0 0.104718 -0.449648 -0.193163 0.746789 -0.800247 -0.833493 -0.264610 -0.809217 ... 0.0 0.0 1.633152 0.190998 7.212958 5.710427 hydrogen sulfate OS(=O)(=O)[O-] 1-ethyl-3-methylimidazolium CCn1cc[n+](c1)C

4030 rows × 196 columns


In [14]:
(devmodel.Data['smiles-anion'].unique())


Out[14]:
array(['[B-](F)(F)(F)F', 'F[P-](F)(F)(F)(F)F', 'N#C[B-](C#N)(C#N)C#N',
       'C(F)(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F', 'C(#N)[N-]C#N',
       'C(F)(F)(F)S(=O)(=O)[O-]', 'CS(=O)(=O)[O-]', 'COS(=O)(=O)[O-]',
       '[I-]', 'C(#N)[C-](C#N)C#N', 'C(#N)[S-]', 'CC(=O)[O-]', '[Cl-]',
       '[Br-]', 'CC(CC(C)(C)C)CP(=O)(CC(C)CC(C)(C)C)[O-]',
       'COP(=O)([O-])OC', 'CCCCC(=O)[O-]', 'CCCC(=O)[O-]', 'CCC(=O)[O-]',
       'CCOP(=O)([O-])OCC', 'C(=[N-])=NC#N', '[N+](=O)([O-])[O-]',
       'C(=O)(C(F)(F)F)[O-]', 'C(CS(=O)(=O)[O-])N',
       'C(CCN)C[C@@H](C(=O)[O-])N', 'C[C@H]([C@@H](C(=O)[O-])N)O',
       'C([C@@H](C(=O)[O-])N)S', 'CC(C)[C@@H](C(=O)[O-])N',
       'C(C(F)(F)[P-](C(C(F)(F)F)(F)F)(C(C(F)(F)F)(F)F)(F)(F)F)(F)(F)F',
       'FC(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F', 'C([C@@H](C(=O)[O-])N)O',
       'C1C[C@H](NC1)C(=O)[O-]', 'CCOS(=O)(=O)[O-]', 'C1=CC=C(C=C1)[O-]',
       'CCCCCCCCOS(=O)(=O)[O-]', 'OS(=O)(=O)[O-]'], dtype=object)

In [3]:
devmodel.Data.head()


Out[3]:
steiger-anion Marsili Partial Charges-anion BalabanJ-anion BertzCT-anion Ipc-anion HallKierAlpha-anion Kappa1-anion Kappa2-anion Kappa3-anion Chi0-anion ... VSA_EState9-cation VSA_EState10-cation Topliss fragments-cation Temperature, K Pressure, kPa Specific density, kg/m<SUP>3</SUP> name-anion smiles-anion name-cation smiles-cation
0 0.0 0.0 -0.547513 -0.934312 -0.064 0.404019 -0.685285 -1.007495 -0.246189 -0.651482 ... 0.202674 0.000429 0.0 -0.811297 0.403571 7.091026 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
1 0.0 0.0 -0.547513 -0.934312 -0.064 0.404019 -0.685285 -1.007495 -0.246189 -0.651482 ... 0.202674 0.000429 0.0 -0.811297 0.403571 7.091908 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
2 0.0 0.0 -0.547513 -0.934312 -0.064 0.404019 -0.685285 -1.007495 -0.246189 -0.651482 ... 0.202674 0.000429 0.0 -0.811297 0.403571 7.092158 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
3 0.0 0.0 -0.547513 -0.934312 -0.064 0.404019 -0.685285 -1.007495 -0.246189 -0.651482 ... 0.202674 0.000429 0.0 -0.811297 0.403571 7.091226 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C
4 0.0 0.0 -0.547513 -0.934312 -0.064 0.404019 -0.685285 -1.007495 -0.246189 -0.651482 ... 0.202674 0.000429 0.0 -0.811297 0.403571 7.091884 tetrafluoroborate [B-](F)(F)(F)F 1-butyl-2-methylpyridinium CCCC[n+]1ccc(cc1)C

5 rows × 195 columns


In [4]:
from keras.layers import Dense, Dropout, Input
from keras.models import Model, Sequential
from keras.optimizers import Nadam
from keras.callbacks import EarlyStopping

early = EarlyStopping(monitor='mean_squared_error', patience=50, verbose=1)

X_train, Y_train, X_test, Y_test = salty.devmodel_to_array\
    (devmodel, train_fraction=0.8)
model = Sequential()
model.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))
model.compile(optimizer="adam",
              loss="mean_squared_error",
              metrics=['mse'])
model.fit(X_train,Y_train,epochs=10000,verbose=False,callbacks=[early])
scores = model.evaluate(X_test, Y_test) 
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


/home/wesleybeckner/anaconda3/envs/py36/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
Epoch 00324: early stopping
3116/3116 [==============================] - 0s 41us/step
mean_squared_error: 0.01%

In [5]:
X=X_test
Y=Y_test
with plt.style.context('seaborn-whitegrid'):
    fig=plt.figure(figsize=(5,5), dpi=300)
    ax=fig.add_subplot(111)
    minval = np.min([np.exp(model.predict(X)[:]), np.exp(Y)[:]])
    maxval = np.max([np.exp(model.predict(X)[:]), np.exp(Y)[:]])
    buffer = (maxval - minval) / 100 * 2
    minval = minval - buffer
    maxval = maxval + buffer
    ax.plot([minval, maxval], [minval, maxval], linestyle="-",
            label=None, c="black", linewidth=1)
    ax.plot(np.exp(Y)[:],np.exp(model.predict(X))[:],\
            marker="*",linestyle="",alpha=0.4)
    ax.set_ylabel("Predicted $C_{pt}$ $(K/J/mol)$")
    ax.set_xlabel("Actual $C_{pt}$ $(K/J/mol)$")
    ax.grid()
    plt.tight_layout()



In [6]:
#save_model("viscosity", model, devmodel, path="../salty/data/")

In [7]:
#looks good, let's save this bad boi
def save_model(name, model, devmodel, path=None):
    if path:
        model.save("{}{}_qspr.h5".format(path, name))
        devmodel.Data_summary.to_csv("{}{}_summ.csv".format(path, name))
        devmodel.Coef_data.to_csv("{}{}_desc.csv".format(path, name), index=False)
    else:
        model.save("{}_qspr.h5".format(name))
        devmodel.Data_summary.to_csv("{}_summ.csv".format(name))
        devmodel.Coef_data.to_csv("{}_desc.csv".format(name), index=False)

In [4]:
pd.read_csv("../salty/data/density_desc.csv")


Out[4]:
steiger-anion Marsili Partial Charges-anion BalabanJ-anion BertzCT-anion Ipc-anion HallKierAlpha-anion Kappa1-anion Kappa2-anion Kappa3-anion Chi0-anion ... VSA_EState4-cation VSA_EState5-cation VSA_EState6-cation VSA_EState7-cation VSA_EState8-cation VSA_EState9-cation VSA_EState10-cation Topliss fragments-cation Temperature, K Pressure, kPa
0 777.0 777.0 3.733904 151.962372 984.463825 -0.374538 7.944294 2.275198 58769.601324 7.079242 ... 0.0 0.0 0.0 0.0 3.101044 16.233092 -0.004455 777.0 305.253099 101.168442
1 1.0 1.0 1.379774 149.342354 17048.352163 0.446280 4.809766 1.471130 230803.404002 4.174735 ... 1.0 1.0 1.0 1.0 2.251087 7.054571 0.240889 1.0 5.793918 0.377976

2 rows × 190 columns


In [3]:
import pandas as pd
pd.read_csv("../salty/data/density_summ.csv")


Out[3]:
Unnamed: 0 0
0 Unique salts 461
1 Cations array(['CCCC[n+]1ccc(cc1)C', 'CCCn1cc[n+](c1C)...
2 Anions array(['[B-](F)(F)(F)F', 'F[P-](F)(F)(F)(F)F',...
3 Total datapoints 5631
4 density 871.3 - 1709.32
5 Temperature range (K) 100.0 - 101.33
6 Pressure range (kPa) 297.05 - 315.91