In [ ]:
## skip, if data is already downloaded
!wget --no-verbose http://homes.esat.kuleuven.be/~jsimm/chembl-IC50-346targets.mm
!wget --no-verbose http://homes.esat.kuleuven.be/~jsimm/chembl-IC50-compound-feat.mm
In [ ]:
import macau
import scipy.io
macau.__version__
In [ ]:
## loading data
ic50 = scipy.io.mmread("chembl-IC50-346targets.mm")
ecfp = scipy.io.mmread("chembl-IC50-compound-feat.mm")
In [ ]:
## running factorization (Macau)
result = macau.macau(Y = ic50,
Ytest = 0.2,
side = [ecfp, None],
num_latent = 32,
precision = 5.0,
burnin = 40,
nsamples = 160,
univariate = True)
In [ ]:
result.rmse_test
In [ ]:
result.prediction.head()
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
plt.plot(result.prediction.y, result.prediction.y_pred, ".", alpha=.4)
plt.title('pIC50s for all proteins');
plt.xlabel("Measured pIC50")
plt.ylabel("Predicted pIC50")
In [ ]:
## per protein plot
pid = 9
prot = result.prediction.query("col == %d" % pid)
plt.plot(prot.y, prot.y_pred, ".", alpha=.4)
plt.title('pIC50s for protein %d' % pid);
plt.xlabel("Measured pIC50")
plt.ylabel("Predicted pIC50")
In [ ]: