notebook.community

Edit and run



In [1]:

    
import pickle

MAX_ERROR = 1000

predictions_svr = pickle.load(open('validation_set_predictions_svr.pickle', 'rb'))
predictions_rf = pickle.load(open('validation_set_predictions_rf.pickle', 'rb'))
predictions_svr.shape, predictions_rf.shape









    Out[1]:





((516, 2), (516, 2))



In [2]:

    
import numpy as np

def convert_to_conc(column):
    return column.apply(lambda x : np.power(10, -1 * x) * 1.0e9)

predictions_svr['ic50_predicted_nM'] = convert_to_conc(predictions_svr.ic50_predicted)
predictions_svr['ic50_true_nM']= convert_to_conc(predictions_svr.ic50_true)
predictions_rf['ic50_predicted_nM'] = convert_to_conc(predictions_rf.ic50_predicted)
predictions_rf['ic50_true_nM']= convert_to_conc(predictions_rf.ic50_true)



In [3]:

    
print(predictions_rf.shape)
predictions_rf.index = predictions_svr.index
predictions_rf.head()









    



(516, 4)






    Out[3]:






  
    
      
      ic50_predicted
      ic50_true
      ic50_predicted_nM
      ic50_true_nM
    
    
      smiles
      
      
      
      
    
  
  
    
      CCc1c(O)ccc2c1O[C@H](c1ccc(OCCN3CCCCC3)cc1)[C@H](c1ccc(O)cc1)S2
      7.952289
      8.327902
      11.161194
      4.70
    
    
      COc1c(Br)c(O)c(Br)c2nc(-c3ccc(O)c(F)c3)oc12
      5.705610
      6.252588
      1969.654174
      559.00
    
    
      Oc1ccc2nc(-c3ccc(O)c(F)c3)cc(Cl)c2c1
      6.481460
      6.609065
      330.019873
      246.00
    
    
      COc1cc(/C=C2\SC(=O)NC2=O)ccc1Oc1ccc(C#N)cc1C(F)(F)F
      5.159806
      5.657577
      6921.401522
      2200.00
    
    
      CCOC(=O)CSc1nn(C)c(=S)s1
      4.674710
      5.097820
      21149.000015
      7983.25



In [4]:

    
print(predictions_svr.shape)
predictions_svr.head()









    



(516, 4)






    Out[4]:






  
    
      
      ic50_predicted
      ic50_true
      ic50_predicted_nM
      ic50_true_nM
    
    
      smiles
      
      
      
      
    
  
  
    
      CCc1c(O)ccc2c1O[C@H](c1ccc(OCCN3CCCCC3)cc1)[C@H](c1ccc(O)cc1)S2
      7.867606
      8.327902
      13.564205
      4.70
    
    
      COc1c(Br)c(O)c(Br)c2nc(-c3ccc(O)c(F)c3)oc12
      5.748524
      6.252588
      1784.331355
      559.00
    
    
      Oc1ccc2nc(-c3ccc(O)c(F)c3)cc(Cl)c2c1
      6.004725
      6.609065
      989.178529
      246.00
    
    
      COc1cc(/C=C2\SC(=O)NC2=O)ccc1Oc1ccc(C#N)cc1C(F)(F)F
      5.301276
      5.657577
      4997.164427
      2200.00
    
    
      CCOC(=O)CSc1nn(C)c(=S)s1
      5.018956
      5.097820
      9572.899722
      7983.25



In [5]:

    
%matplotlib inline
from matplotlib import pyplot as plt

from scipy.stats import pearsonr

plt.rcParams["figure.figsize"] = [7, 7]
span = (1,12)
axes = plt.gca()
axes.set_xlim(span)
axes.set_ylim(span)

print('Pearson correlation:', pearsonr(predictions_svr.ic50_predicted, predictions_rf.ic50_predicted)[0])

plt.plot((span[0],span[1]), (span[0],span[1]), linestyle='--')
plt.scatter(
    predictions_svr.ic50_predicted
    , predictions_rf.ic50_predicted
    , c='blue'
    , s=20
)
plt.xlabel('SVR')
plt.ylabel('RF')









    



Pearson correlation: 0.946775296642






    Out[5]:





<matplotlib.text.Text at 0x7f4c088065f8>



In [6]:

    
threshold = MAX_ERROR

diffs_rf = ((predictions_rf.ic50_predicted_nM - predictions_rf.ic50_true_nM).abs() < threshold).tolist()
accurate_rf = predictions_rf[diffs_rf]

diffs_svr = ((predictions_svr.ic50_predicted_nM - predictions_svr.ic50_true_nM).abs() < threshold).tolist()
accurate_svr = predictions_svr[diffs_svr]

print(accurate_rf.shape, accurate_svr.shape)









    



(314, 4) (293, 4)



In [7]:

    
from pandas import merge

accurate_all = merge(accurate_rf, accurate_svr, suffixes=('_rf', '_svr'), left_index=True, right_index=True, how='inner')
print(accurate_all.shape)
accurate_all.head()









    



(276, 8)






    Out[7]:






  
    
      
      ic50_predicted_rf
      ic50_true_rf
      ic50_predicted_nM_rf
      ic50_true_nM_rf
      ic50_predicted_svr
      ic50_true_svr
      ic50_predicted_nM_svr
      ic50_true_nM_svr
    
    
      smiles
      
      
      
      
      
      
      
      
    
  
  
    
      CCc1c(O)ccc2c1O[C@H](c1ccc(OCCN3CCCCC3)cc1)[C@H](c1ccc(O)cc1)S2
      7.952289
      8.327902
      11.161194
      4.7
      7.867606
      8.327902
      13.564205
      4.7
    
    
      Oc1ccc2nc(-c3ccc(O)c(F)c3)cc(Cl)c2c1
      6.481460
      6.609065
      330.019873
      246.0
      6.004725
      6.609065
      989.178529
      246.0
    
    
      Cc1nc(CN2CCN(c3nc(NCCc4ccc(O)cc4)nc(N(C)CCCc4ccc(Cl)cc4)n3)CC2)cs1
      7.190932
      6.823909
      64.427056
      150.0
      6.885006
      6.823909
      130.314751
      150.0
    
    
      Oc1ccc([C@H]2Sc3cc(O)ccc3S[C@H]2c2ccc(OCCN3CCCC3)c(Br)c2)cc1
      8.717648
      8.397940
      1.915807
      4.0
      8.679498
      8.397940
      2.091714
      4.0
    
    
      O=C1c2c(Cl)cc(O)cc2O[C@H](c2ccc(OCCN3CCCCC3)cc2)[C@@H]1c1ccc(O)cc1
      7.565931
      7.236572
      27.168740
      58.0
      7.472004
      7.236572
      33.728433
      58.0



In [8]:

    
import numpy as np

differences = []
for x, y in zip(accurate_all.ic50_predicted_nM_rf, accurate_all.ic50_predicted_nM_svr):
    differences.append(abs(x - y))
differences = np.array(differences)
print('Agreement up to 200 nM: ', sum(differences < 200) / len(differences))
weights = np.ones_like(differences) / len(differences)
plt.hist(differences, weights=weights)
plt.show()









    



Agreement up to 200 nM:  0.768115942029



In [ ]:

	ic50_predicted	ic50_true	ic50_predicted_nM	ic50_true_nM
smiles
CCc1c(O)ccc2c1O[C@H](c1ccc(OCCN3CCCCC3)cc1)[C@H](c1ccc(O)cc1)S2	7.952289	8.327902	11.161194	4.70
COc1c(Br)c(O)c(Br)c2nc(-c3ccc(O)c(F)c3)oc12	5.705610	6.252588	1969.654174	559.00
Oc1ccc2nc(-c3ccc(O)c(F)c3)cc(Cl)c2c1	6.481460	6.609065	330.019873	246.00
COc1cc(/C=C2\SC(=O)NC2=O)ccc1Oc1ccc(C#N)cc1C(F)(F)F	5.159806	5.657577	6921.401522	2200.00
CCOC(=O)CSc1nn(C)c(=S)s1	4.674710	5.097820	21149.000015	7983.25

	ic50_predicted	ic50_true	ic50_predicted_nM	ic50_true_nM
smiles
CCc1c(O)ccc2c1O[C@H](c1ccc(OCCN3CCCCC3)cc1)[C@H](c1ccc(O)cc1)S2	7.867606	8.327902	13.564205	4.70
COc1c(Br)c(O)c(Br)c2nc(-c3ccc(O)c(F)c3)oc12	5.748524	6.252588	1784.331355	559.00
Oc1ccc2nc(-c3ccc(O)c(F)c3)cc(Cl)c2c1	6.004725	6.609065	989.178529	246.00
COc1cc(/C=C2\SC(=O)NC2=O)ccc1Oc1ccc(C#N)cc1C(F)(F)F	5.301276	5.657577	4997.164427	2200.00
CCOC(=O)CSc1nn(C)c(=S)s1	5.018956	5.097820	9572.899722	7983.25