In [1]:
import pickle
MAX_ERROR = 1000
predictions_svr = pickle.load(open('validation_set_predictions_svr.pickle', 'rb'))
predictions_rf = pickle.load(open('validation_set_predictions_rf.pickle', 'rb'))
predictions_svr.shape, predictions_rf.shape
Out[1]:
In [2]:
import numpy as np
def convert_to_conc(column):
return column.apply(lambda x : np.power(10, -1 * x) * 1.0e9)
predictions_svr['ic50_predicted_nM'] = convert_to_conc(predictions_svr.ic50_predicted)
predictions_svr['ic50_true_nM']= convert_to_conc(predictions_svr.ic50_true)
predictions_rf['ic50_predicted_nM'] = convert_to_conc(predictions_rf.ic50_predicted)
predictions_rf['ic50_true_nM']= convert_to_conc(predictions_rf.ic50_true)
In [3]:
print(predictions_rf.shape)
predictions_rf.index = predictions_svr.index
predictions_rf.head()
Out[3]:
In [4]:
print(predictions_svr.shape)
predictions_svr.head()
Out[4]:
In [5]:
%matplotlib inline
from matplotlib import pyplot as plt
from scipy.stats import pearsonr
plt.rcParams["figure.figsize"] = [7, 7]
span = (1,12)
axes = plt.gca()
axes.set_xlim(span)
axes.set_ylim(span)
print('Pearson correlation:', pearsonr(predictions_svr.ic50_predicted, predictions_rf.ic50_predicted)[0])
plt.plot((span[0],span[1]), (span[0],span[1]), linestyle='--')
plt.scatter(
predictions_svr.ic50_predicted
, predictions_rf.ic50_predicted
, c='blue'
, s=20
)
plt.xlabel('SVR')
plt.ylabel('RF')
Out[5]:
In [6]:
threshold = MAX_ERROR
diffs_rf = ((predictions_rf.ic50_predicted_nM - predictions_rf.ic50_true_nM).abs() < threshold).tolist()
accurate_rf = predictions_rf[diffs_rf]
diffs_svr = ((predictions_svr.ic50_predicted_nM - predictions_svr.ic50_true_nM).abs() < threshold).tolist()
accurate_svr = predictions_svr[diffs_svr]
print(accurate_rf.shape, accurate_svr.shape)
In [7]:
from pandas import merge
accurate_all = merge(accurate_rf, accurate_svr, suffixes=('_rf', '_svr'), left_index=True, right_index=True, how='inner')
print(accurate_all.shape)
accurate_all.head()
Out[7]:
In [8]:
import numpy as np
differences = []
for x, y in zip(accurate_all.ic50_predicted_nM_rf, accurate_all.ic50_predicted_nM_svr):
differences.append(abs(x - y))
differences = np.array(differences)
print('Agreement up to 200 nM: ', sum(differences < 200) / len(differences))
weights = np.ones_like(differences) / len(differences)
plt.hist(differences, weights=weights)
plt.show()
In [ ]: