notebook.community

Edit and run



In [2]:

    
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:

    
# load data

import pymc
db = pymc.database.hdf5.load('300_adaptive_3gbmodel.h5')
is_param = lambda s:'_radius' in s or '_scalingFactor' in s

# inputs
params = []
for s in db._traces:
    if is_param(s):
        params.append(db._traces[s]())
params = np.array(params).T

# outputs
targets = db.dg_gbsa()
print(params.shape,targets.shape)

# experimental data
dg_exp = db.dg_exp

# -2 * log_likelihood
log_likelihood = -0.5 * db.deviance()









    



((770, 24), (770, 235))



In [4]:

    
log_likelihood.shape









    Out[4]:





(770,)



In [5]:

    
plt.plot(log_likelihood[10:])









    Out[5]:





[<matplotlib.lines.Line2D at 0x10f4d79d0>]



In [6]:

    
# how much linear structure is there in the inputs?

from sklearn.decomposition import PCA
pca = PCA()
pca.fit(params)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('PC #')
plt.ylabel('Cumulative explained variance')
plt.figure()
plt.plot(pca.explained_variance_ratio_)
plt.xlabel('PC #')
plt.ylabel('Explained variance')
print('How correlated are our samples? Can we use a small subset of these molecules for testing?')









    



How correlated are our samples? Can we use a small subset of these molecules for testing?



In [7]:

    
# how much linear structure is there among our ouputs?

pca = PCA()
pca.fit(targets)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('PC #')
plt.ylabel('Cumulative explained variance')
plt.figure()
plt.plot(pca.explained_variance_ratio_)
plt.xlabel('PC #')
plt.ylabel('Explained variance')
print('How correlated are our outputs?')









    



How correlated are our outputs?



In [130]:

    
# let's maybe try to select a subset of the 235 compounds that best summarizes the behavior of the rest
# perhaps we can do this by inspecting the leading PCs?
plt.plot(pca.components_[0])









    Out[130]:





[<matplotlib.lines.Line2D at 0x117546710>]



In [124]:

    
# hmm, let's instead use sparse PCA so that we can better interpret the components
from sklearn.decomposition import SparsePCA
spca = SparsePCA(alpha=0.9)
spca.fit(targets)









    Out[124]:





SparsePCA(U_init=None, V_init=None, alpha=0.9, max_iter=1000, method='lars',
     n_components=None, n_jobs=1, random_state=None, ridge_alpha=0.01,
     tol=1e-08, verbose=False)



In [136]:

    
plt.plot(spca.components_[0]);
plt.xlabel('PC #')
plt.figure()
plt.plot(spca.components_[1]);
plt.xlabel('PC #')









    Out[136]:





<matplotlib.text.Text at 0x117c86650>



In [29]:

    
# if we fit a PLS model between the inputs and outputs, how well do we do?
from sklearn.cross_decomposition import PLSRegression
pls = PLSRegression()
pls.fit(params,targets)
pls.score(params,targets)









    Out[29]:





0.48881015562424535



In [8]:

    
# what if we fit an unregulized linear model between our inputs and outputs?
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(params,targets)
lr.score(params,targets)









    Out[8]:





0.8550597958818138



In [9]:

    
# renaming
X = params
y = targets



In [10]:

    
# let's do this again except with K-fold CV
K = 10

from sklearn.cross_validation import KFold
kfold = KFold(len(params),K,shuffle=True,random_state=0)

train_scores = []
test_scores = []
mse_test = []

for train_index, test_index in kfold:
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    lr = LinearRegression()
    lr.fit(X_train,y_train)
    train_scores.append(lr.score(X_train,y_train))
    test_scores.append(lr.score(X_test,y_test))
    mse_test.append(np.sum((lr.predict(X_test)-y_test)**2) / len(y_test))



In [11]:

    
print('Train scores: {0:.3f} +/- {1:.3f}'.format(np.mean(train_scores),np.std(train_scores)))
print('Test scores: {0:.3f} +/- {1:.3f}'.format(np.mean(test_scores),np.std(test_scores)))
print('Test MSE: {0:.3f} +/- {1:.3f}'.format(np.mean(mse_test),np.std(mse_test)))









    



Train scores: 0.856 +/- 0.002
Test scores: 0.836 +/- 0.025
Test MSE: 97.162 +/- 7.768



In [10]:

    
# hmm, that works surprisingly well-- maybe an even easier thing to do here
# would be to do bayesian linear regression and apply a



In [104]:

    
# now let's see how much better we can do if we instead use a GP to interpolate
from sklearn.gaussian_process import GaussianProcess
gp = GaussianProcess()
gp.fit(X,y)

# ugggg what does this error even mean









    



---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-104-67ea58460ecb> in <module>()
      1 from sklearn.gaussian_process import GaussianProcess
      2 gp = GaussianProcess()
----> 3 gp.fit(X,y)
      4 
      5 # ugggg what does this error mean

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/sklearn/gaussian_process/gaussian_process.pyc in fit(self, X, y)
    299         if (np.min(np.sum(D, axis=1)) == 0.
    300                 and self.corr != correlation.pure_nugget):
--> 301             raise Exception("Multiple input features cannot have the same"
    302                             " target value.")
    303 

Exception: Multiple input features cannot have the same target value.



In [12]:

    
# using the Sheffield library
import GPy



In [8]:

    
# omg look at all these cool models to try
coolmodels = [GPy.models.GPKroneckerGaussianRegression,
              GPy.models.GPHeteroscedasticRegression,
              GPy.models.SparseGPRegression,
              GPy.models.SparseGPRegressionUncertainInput]



In [9]:

    
# but first thing's first
gp = GPy.models.GPRegression(X_train,y_train)
gp.optimize()



In [10]:

    
gp.predict(X_test)[0].shape,y_test.shape









    Out[10]:





((77, 235), (77, 235))



In [11]:

    
mse_test = []

for train_index, test_index in kfold:
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    gp = GPy.models.GPRegression(X_train,y_train)
    gp.optimize()
    mse_test.append(np.sum((gp.predict(X_test)[0]-y_test)**2) / len(y_test))

print('Test MSE: {0:.3f} +/- {1:.3f}'.format(np.mean(mse_test),np.std(mse_test)))









    



Test MSE: 29.545 +/- 2.850



In [ ]:

    
# okay so this works about ~3x better for unseen data than linear regression does... wooorth iiit?



In [ ]:

    
# let's pick a better output metric, maybe just one difficult-to-predict target?



In [13]:

    
import GPyOpt



In [14]:

    
# for the sake of visualization, let's first look at the projection
# of X that's maximally informative about Molecule 1

y_ = y[:,0]

lr = LinearRegression()
lr.fit(X,y_)

x_ = X.dot(lr.coef_).reshape((len(y),1))
y_ = y[:,0].reshape((len(y),1))



In [15]:

    
plt.rc('font', family='serif')



In [96]:

    
plt.scatter(x_,y_,linewidths=0,alpha=0.5,label='Samples')

plt.hlines(dg_exp[0],min(x_),max(x_),linestyles='--',
           label='Experimental')
plt.legend(loc='best')
plt.xlabel('Linreg_1(X)')
plt.ylabel(r'$\Delta G$ for Molecule 1')
plt.legend(loc='best')









    Out[96]:





<matplotlib.legend.Legend at 0x11a09c7d0>



In [16]:

    
gp = GPy.models.GPRegression(x_,y_)
gp.optimize()

pred_fun = lambda x:gp.predict(x)[0]



In [17]:

    
gp.plot(data_symbol='k.')
i = 0
plt.title(r'Molecule {0}, Experimental $\Delta G$={1:.3f}'.format(i+1,dg_exp[i]))
plt.xlabel(r'Parameter set $\Theta$')
plt.ylabel(r'Simulated $\Delta G_{GBSA}$')
#plt.hlines(dg_exp[0],min(x_),max(x_))
#plt.ylim(dg_exp[0]-1.0,1)









    Out[17]:





<matplotlib.text.Text at 0x1111f3690>



In [25]:

    
i=87
y_ = y[:,i]

lr = LinearRegression()
lr.fit(X,y_)

x_ = X.dot(lr.coef_).reshape((len(y_),1))
y_ = y_.reshape((len(y_),1))

plt.figure()
gp = GPy.models.GPRegression(x_,y_)
gp.optimize()
gp.plot(data_symbol='k.')
plt.hlines(dg_exp[i],min(x_),max(x_),linestyles='--')
plt.title(r'Molecule {0}, Experimental $\Delta G$={1:.3f}'.format(i+1,dg_exp[i]))
plt.xlabel(r'Parameter set $\Theta$')
plt.ylabel(r'Simulated $\Delta G_{GBSA}$')









    Out[25]:





<matplotlib.text.Text at 0x1177a2890>






    





<matplotlib.figure.Figure at 0x106856a90>



In [71]:

    
# let's generate these plots for each of the molecules in our dataset



In [125]:

    
for i in range(y.shape[1]):
    y_ = y[:,i]

    lr = LinearRegression()
    lr.fit(X,y_)

    x_ = X.dot(lr.coef_).reshape((len(y_),1))
    y_ = y_.reshape((len(y_),1))

    plt.figure()
    gp = GPy.models.GPRegression(x_,y_)
    gp.optimize()
    gp.plot(data_symbol='k.')
    plt.hlines(dg_exp[i],min(x_),max(x_),linestyles='--')
    plt.title(r'Molecule {0}, Experimental $\Delta G$={1:.3f}'.format(i+1,dg_exp[i]))
    plt.xlabel(r'Parameter set $\Theta$')
    plt.ylabel(r'Simulated $\Delta G_{GBSA}$')
    plt.savefig('gp_plots/molecule_{0}.jpg'.format(i),dpi=300)

    plt.close()









    





<matplotlib.figure.Figure at 0x191fd1ed0>






    





<matplotlib.figure.Figure at 0x191fd1110>






    





<matplotlib.figure.Figure at 0x195c191d0>






    





<matplotlib.figure.Figure at 0x119c8df50>






    





<matplotlib.figure.Figure at 0x1ab81f490>






    





<matplotlib.figure.Figure at 0x1ab854150>






    





<matplotlib.figure.Figure at 0x1ad30f5d0>






    





<matplotlib.figure.Figure at 0x1ad2dd110>






    





<matplotlib.figure.Figure at 0x1bddd4950>






    





<matplotlib.figure.Figure at 0x1d11c7b10>






    





<matplotlib.figure.Figure at 0x191ef7650>






    





<matplotlib.figure.Figure at 0x16bcc87d0>






    





<matplotlib.figure.Figure at 0x123016dd0>






    





<matplotlib.figure.Figure at 0x119a83750>






    





<matplotlib.figure.Figure at 0x11e130c90>






    





<matplotlib.figure.Figure at 0x1257edc10>






    





<matplotlib.figure.Figure at 0x1367e0790>






    





<matplotlib.figure.Figure at 0x12673de10>






    





<matplotlib.figure.Figure at 0x11f566790>






    





<matplotlib.figure.Figure at 0x119ea7c10>






    





<matplotlib.figure.Figure at 0x1265d7ed0>






    





<matplotlib.figure.Figure at 0x1bdff5b10>






    





<matplotlib.figure.Figure at 0x1190cfa10>






    





<matplotlib.figure.Figure at 0x1190cf990>






    





<matplotlib.figure.Figure at 0x11b5f6c50>






    





<matplotlib.figure.Figure at 0x1190ab2d0>






    





<matplotlib.figure.Figure at 0x11aa7d210>






    





<matplotlib.figure.Figure at 0x118ef5e10>






    





<matplotlib.figure.Figure at 0x11a971a10>






    





<matplotlib.figure.Figure at 0x11bf85290>






    





<matplotlib.figure.Figure at 0x11a9ef050>






    





<matplotlib.figure.Figure at 0x13684cb50>






    





<matplotlib.figure.Figure at 0x11dd74910>






    





<matplotlib.figure.Figure at 0x12a8e10d0>






    





<matplotlib.figure.Figure at 0x11c05f1d0>






    





<matplotlib.figure.Figure at 0x12c542490>






    





<matplotlib.figure.Figure at 0x12c44f050>






    





<matplotlib.figure.Figure at 0x12c4d8790>






    





<matplotlib.figure.Figure at 0x163dadf50>






    





<matplotlib.figure.Figure at 0x16a40f9d0>






    





<matplotlib.figure.Figure at 0x16a5cfb90>






    





<matplotlib.figure.Figure at 0x16a439450>






    





<matplotlib.figure.Figure at 0x16a5f1750>






    





<matplotlib.figure.Figure at 0x16be67050>






    





<matplotlib.figure.Figure at 0x16a5fe790>






    





<matplotlib.figure.Figure at 0x16bf3d090>






    





<matplotlib.figure.Figure at 0x191e48350>






    





<matplotlib.figure.Figure at 0x191e32210>






    





<matplotlib.figure.Figure at 0x1915bdd10>






    





<matplotlib.figure.Figure at 0x191dba650>






    





<matplotlib.figure.Figure at 0x191dba450>






    





<matplotlib.figure.Figure at 0x19b2e4050>






    





<matplotlib.figure.Figure at 0x19b27d610>






    





<matplotlib.figure.Figure at 0x1b48a3850>






    





<matplotlib.figure.Figure at 0x19b46d050>






    





<matplotlib.figure.Figure at 0x1b4386a10>






    





<matplotlib.figure.Figure at 0x1b7ad4b90>






    





<matplotlib.figure.Figure at 0x1b7ab7950>






    





<matplotlib.figure.Figure at 0x1be179b50>






    





<matplotlib.figure.Figure at 0x1bdf5d390>






    





<matplotlib.figure.Figure at 0x9301ba410>






    





<matplotlib.figure.Figure at 0x93021ee10>






    





<matplotlib.figure.Figure at 0x9302f00d0>






    





<matplotlib.figure.Figure at 0x9302f0050>






    





<matplotlib.figure.Figure at 0x931835810>






    





<matplotlib.figure.Figure at 0x93186a9d0>






    





<matplotlib.figure.Figure at 0x94dd5d110>






    





<matplotlib.figure.Figure at 0x94cfad450>






    





<matplotlib.figure.Figure at 0x94d277b90>






    





<matplotlib.figure.Figure at 0x94d27c050>






    





<matplotlib.figure.Figure at 0x94dda2390>






    





<matplotlib.figure.Figure at 0x95ef7ccd0>






    





<matplotlib.figure.Figure at 0x960ea5d90>






    





<matplotlib.figure.Figure at 0x960e76dd0>






    





<matplotlib.figure.Figure at 0x96de245d0>






    





<matplotlib.figure.Figure at 0x976d08d90>






    





<matplotlib.figure.Figure at 0x976d08d10>






    





<matplotlib.figure.Figure at 0x984a97fd0>






    





<matplotlib.figure.Figure at 0x976d36750>






    





<matplotlib.figure.Figure at 0x97d5971d0>






    





<matplotlib.figure.Figure at 0x990f28f90>






    





<matplotlib.figure.Figure at 0x990ec3350>






    





<matplotlib.figure.Figure at 0x99bc97110>






    





<matplotlib.figure.Figure at 0x99bc1d110>






    





<matplotlib.figure.Figure at 0x96ddda1d0>






    





<matplotlib.figure.Figure at 0x9a8373990>






    





<matplotlib.figure.Figure at 0x99bb40510>






    





<matplotlib.figure.Figure at 0x9a832a490>






    





<matplotlib.figure.Figure at 0x99bdc6550>






    





<matplotlib.figure.Figure at 0x9b1074050>






    





<matplotlib.figure.Figure at 0x9b1419b10>






    





<matplotlib.figure.Figure at 0x9b102ef90>






    





<matplotlib.figure.Figure at 0x9bcde0fd0>






    





<matplotlib.figure.Figure at 0x9bcf11c10>






    





<matplotlib.figure.Figure at 0x9c8d73110>






    





<matplotlib.figure.Figure at 0x9bcdc76d0>






    





<matplotlib.figure.Figure at 0x9c96b4390>






    





<matplotlib.figure.Figure at 0x9c95eb210>






    





<matplotlib.figure.Figure at 0x9e7bdc9d0>






    





<matplotlib.figure.Figure at 0x9db70dcd0>






    





<matplotlib.figure.Figure at 0x9e7e7de50>






    





<matplotlib.figure.Figure at 0x9de536e50>






    





<matplotlib.figure.Figure at 0x9f24ed290>






    





<matplotlib.figure.Figure at 0x9f24bce50>






    





<matplotlib.figure.Figure at 0x9f22a9ed0>






    





<matplotlib.figure.Figure at 0x9f66d8190>






    





<matplotlib.figure.Figure at 0x9f6699f50>






    





<matplotlib.figure.Figure at 0x96de0a550>






    





<matplotlib.figure.Figure at 0x9c98a6310>






    





<matplotlib.figure.Figure at 0xa0a0290d0>






    





<matplotlib.figure.Figure at 0xa0a0291d0>






    





<matplotlib.figure.Figure at 0xa254c6590>






    





<matplotlib.figure.Figure at 0x9fb2fd8d0>






    





<matplotlib.figure.Figure at 0xa182a66d0>






    





<matplotlib.figure.Figure at 0xa256552d0>






    





<matplotlib.figure.Figure at 0xa258a2990>






    





<matplotlib.figure.Figure at 0xa2d0210d0>






    





<matplotlib.figure.Figure at 0xa2d077190>






    





<matplotlib.figure.Figure at 0xa2d242450>






    





<matplotlib.figure.Figure at 0xa4732add0>






    





<matplotlib.figure.Figure at 0xa472e2790>






    





<matplotlib.figure.Figure at 0xa3dcd0190>






    





<matplotlib.figure.Figure at 0xa5a374d50>






    





<matplotlib.figure.Figure at 0xa5a374cd0>






    





<matplotlib.figure.Figure at 0xa4b8dded0>






    





<matplotlib.figure.Figure at 0xa5a318a90>






    





<matplotlib.figure.Figure at 0xa5a3c8790>






    





<matplotlib.figure.Figure at 0xa715e4790>






    





<matplotlib.figure.Figure at 0xa5eb200d0>






    





<matplotlib.figure.Figure at 0xa7c684ed0>






    





<matplotlib.figure.Figure at 0xa7c684e90>






    





<matplotlib.figure.Figure at 0xa71683150>






    





<matplotlib.figure.Figure at 0xa71720510>






    





<matplotlib.figure.Figure at 0xa7e172310>






    





<matplotlib.figure.Figure at 0xa717a8150>






    





<matplotlib.figure.Figure at 0xa716c0190>






    





<matplotlib.figure.Figure at 0xa7c694210>






    





<matplotlib.figure.Figure at 0xa8ed2a990>






    





<matplotlib.figure.Figure at 0xa7e42b810>






    





<matplotlib.figure.Figure at 0xaa9947f10>






    





<matplotlib.figure.Figure at 0xaa01aaf90>






    





<matplotlib.figure.Figure at 0xaade5df90>






    





<matplotlib.figure.Figure at 0xa8ed35b50>






    





<matplotlib.figure.Figure at 0xaadf2e150>






    





<matplotlib.figure.Figure at 0xaadfa4550>






    





<matplotlib.figure.Figure at 0xab8730c50>






    





<matplotlib.figure.Figure at 0xab8730bd0>






    





<matplotlib.figure.Figure at 0xab8702fd0>






    





<matplotlib.figure.Figure at 0xab87a6490>






    





<matplotlib.figure.Figure at 0xac3437550>






    





<matplotlib.figure.Figure at 0xa9ab34090>






    





<matplotlib.figure.Figure at 0xad4f49b50>






    





<matplotlib.figure.Figure at 0xad3551f10>






    





<matplotlib.figure.Figure at 0xad4f04ed0>






    





<matplotlib.figure.Figure at 0xab886bed0>






    





<matplotlib.figure.Figure at 0xae7228b90>






    





<matplotlib.figure.Figure at 0xaefc8e590>






    





<matplotlib.figure.Figure at 0xaefd45450>






    





<matplotlib.figure.Figure at 0xaefe01c10>






    





<matplotlib.figure.Figure at 0xad4ef80d0>






    





<matplotlib.figure.Figure at 0xb0b88ac90>






    





<matplotlib.figure.Figure at 0xb0b88ac10>






    





<matplotlib.figure.Figure at 0xb0b8bd250>






    





<matplotlib.figure.Figure at 0xb0bb610d0>






    





<matplotlib.figure.Figure at 0xb14f32050>






    





<matplotlib.figure.Figure at 0xb0b940190>






    





<matplotlib.figure.Figure at 0xb15209990>






    





<matplotlib.figure.Figure at 0xb2ec61ed0>






    





<matplotlib.figure.Figure at 0xb29dbfc50>






    





<matplotlib.figure.Figure at 0xb2d4c9fd0>






    





<matplotlib.figure.Figure at 0xb2d4c9b50>






    





<matplotlib.figure.Figure at 0xb2ef32c50>






    





<matplotlib.figure.Figure at 0xb2eca6450>






    





<matplotlib.figure.Figure at 0xb323940d0>






    





<matplotlib.figure.Figure at 0xb32394250>






    





<matplotlib.figure.Figure at 0xb32482bd0>






    





<matplotlib.figure.Figure at 0x14a76f150>






    





<matplotlib.figure.Figure at 0xb392c96d0>






    





<matplotlib.figure.Figure at 0xb47ef8490>






    





<matplotlib.figure.Figure at 0x14a798b50>






    





<matplotlib.figure.Figure at 0xb73829cd0>






    





<matplotlib.figure.Figure at 0xb73887150>






    





<matplotlib.figure.Figure at 0x14a6c3dd0>






    





<matplotlib.figure.Figure at 0xb72055c50>






    





<matplotlib.figure.Figure at 0xb641e4710>






    





<matplotlib.figure.Figure at 0xb641c6410>






    





<matplotlib.figure.Figure at 0xb81c11d50>






    





<matplotlib.figure.Figure at 0xb5ce565d0>






    





<matplotlib.figure.Figure at 0xb5cec13d0>






    





<matplotlib.figure.Figure at 0xb71fbe050>






    





<matplotlib.figure.Figure at 0xb71fb5650>






    





<matplotlib.figure.Figure at 0xb913fb250>






    





<matplotlib.figure.Figure at 0xb91484ed0>






    





<matplotlib.figure.Figure at 0xb91484e90>






    





<matplotlib.figure.Figure at 0xb9143d150>






    





<matplotlib.figure.Figure at 0xbb6c0cf50>






    





<matplotlib.figure.Figure at 0xba8812b90>






    





<matplotlib.figure.Figure at 0xb9158dc10>






    





<matplotlib.figure.Figure at 0xba884d3d0>






    





<matplotlib.figure.Figure at 0xbaca9c110>






    





<matplotlib.figure.Figure at 0xbc4d8db90>






    





<matplotlib.figure.Figure at 0xbb6ce9b50>






    





<matplotlib.figure.Figure at 0xbc4d95850>






    





<matplotlib.figure.Figure at 0xbcb41da90>






    





<matplotlib.figure.Figure at 0xbda741050>






    





<matplotlib.figure.Figure at 0xbcf6c9090>






    





<matplotlib.figure.Figure at 0xbcfa80f50>






    





<matplotlib.figure.Figure at 0xbdab5df50>






    





<matplotlib.figure.Figure at 0xbc4dd6d90>






    





<matplotlib.figure.Figure at 0xbe8a30a90>






    





<matplotlib.figure.Figure at 0xbe89e20d0>






    





<matplotlib.figure.Figure at 0xbe8b92ed0>






    





<matplotlib.figure.Figure at 0xbfb19a610>






    





<matplotlib.figure.Figure at 0xbfb296d90>






    





<matplotlib.figure.Figure at 0xbfb296d10>






    





<matplotlib.figure.Figure at 0xbfb1ae6d0>






    





<matplotlib.figure.Figure at 0xc00426ed0>






    





<matplotlib.figure.Figure at 0xc17ba41d0>






    





<matplotlib.figure.Figure at 0xc15a948d0>






    





<matplotlib.figure.Figure at 0xc15a38910>






    





<matplotlib.figure.Figure at 0xc196ff0d0>






    





<matplotlib.figure.Figure at 0xc2daa7190>






    





<matplotlib.figure.Figure at 0xc22094b50>






    





<matplotlib.figure.Figure at 0xc21ea8f90>






    





<matplotlib.figure.Figure at 0xc21e7d410>






    





<matplotlib.figure.Figure at 0xc315d4b50>






    





<matplotlib.figure.Figure at 0xc31573590>






    





<matplotlib.figure.Figure at 0xc318c1150>






    





<matplotlib.figure.Figure at 0xc3fde5590>






    





<matplotlib.figure.Figure at 0xc3fcc2050>






    





<matplotlib.figure.Figure at 0xc477a2f90>






    





<matplotlib.figure.Figure at 0xc477f6110>






    





<matplotlib.figure.Figure at 0xc559f81d0>






    





<matplotlib.figure.Figure at 0xc558912d0>






    





<matplotlib.figure.Figure at 0xc558f5050>

What is the function we want to optimize?

Practically, we want to find a single parameter set to use in simulations, the maximum a posteriori estimate $\text{argmax}_\theta p(\theta | \mathcal{D})$.

For uncertainty quantification, we also want to characterize the full posterior distribution $p(\theta | \mathcal{D})$.



In [23]:

    
# for many of the above examples, the experimentally observed delta_G is outside the range
# of simulated delta_Gs



# how many? is the relation between parameters and 
converged = (y.max(0)>=dg_exp)*(y.min(0)<=dg_exp)

converged_scores = []
unconverged_scores = []

for i in range(len(converged)):
    y_ = y[:,i]

    lr = LinearRegression()
    lr.fit(X,y_)
    
    score = lr.score(X,y_)
    
    if converged[i]:
        converged_scores.append(score)
    else:
        unconverged_scores.append(score)

print('Linreg R^2 scores for "converged" molecules: {0:.3f} +/- {1:.3f}'.format(np.mean(converged_scores),np.std(converged_scores)))
print('Linreg R^2 scores for "unconverged" molecules: {0:.3f} +/- {1:.3f}'.format(np.mean(unconverged_scores),np.std(unconverged_scores)))









    



Linreg R^2 scores for "converged" molecules: 0.773 +/- 0.214
Linreg R^2 scores for "unconverged" molecules: 0.842 +/- 0.138



In [26]:

    
gp = GPy.models.GPRegression(x_,y_)
gp.optimize()

pred_fun = lambda x:gp.predict(x)[0]

bounds = (min(x_)-1.0,max(x_)+1.0)

# let's maybe render where the GP would suggest to 
myBopt = GPyOpt.methods.BayesianOptimization(f=pred_fun,            # function to optimize       
                                             bounds=bounds,        # box-constrains of the problem
                                             acquisition='EI',     # Selects the Expected improvement
                                             acquisition_par = 0)









    



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-26-f00a171eb6be> in <module>()
     10                                              bounds=bounds,        # box-constrains of the problem
     11                                              acquisition='EI',     # Selects the Expected improvement
---> 12                                              acquisition_par = 0)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/GPyOpt-0.0.1-py2.7.egg/GPyOpt/methods/bayesian_optimization.pyc in __init__(self, f, bounds, kernel, X, Y, model_data_init, model_optimize_interval, acquisition, acquisition_par, model_optimize_restarts, sparseGP, num_inducing, normalize, true_gradients, exact_feval, verbosity)
     63             self.model_data_init = model_data_init
     64         if X==None or Y == None:
---> 65             self.X = samples_multidimensional_uniform(self.bounds, self.model_data_init)
     66             self.Y = f(self.X)
     67         else:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/GPyOpt-0.0.1-py2.7.egg/GPyOpt/util/general.pyc in samples_multidimensional_uniform(bounds, num_data)
     29     dim = len(bounds)
     30     Z_rand = np.zeros(shape=(num_data,dim))
---> 31     for k in range(0,dim): Z_rand[:,k] = np.random.uniform(low=bounds[k][0],high=bounds[k][1],size=num_data)
     32     return Z_rand
     33 

IndexError: index 1 is out of bounds for axis 0 with size 1



In [ ]: