Downloading data


In [ ]:
## skip, if data is already downloaded
!wget --no-verbose http://homes.esat.kuleuven.be/~jsimm/chembl-IC50-346targets.mm
!wget --no-verbose http://homes.esat.kuleuven.be/~jsimm/chembl-IC50-compound-feat.mm

Running Macau on ChEMBL data


In [ ]:
import macau
import scipy.io
macau.__version__

In [ ]:
## loading data
ic50 = scipy.io.mmread("chembl-IC50-346targets.mm")
ecfp = scipy.io.mmread("chembl-IC50-compound-feat.mm")

In [ ]:
## running factorization (Macau)
result = macau.macau(Y = ic50,
                     Ytest      = 0.2,
                     side       = [ecfp, None],
                     num_latent = 32,
                     precision  = 5.0,
                     burnin     = 40,
                     nsamples   = 160,
                     univariate = True)

In [ ]:
result.rmse_test

In [ ]:
result.prediction.head()

Plotting results


In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt

In [ ]:
plt.plot(result.prediction.y, result.prediction.y_pred, ".", alpha=.4)
plt.title('pIC50s for all proteins');
plt.xlabel("Measured pIC50")
plt.ylabel("Predicted pIC50")

In [ ]:
## per protein plot
pid = 9
prot = result.prediction.query("col == %d" % pid)
plt.plot(prot.y, prot.y_pred, ".", alpha=.4)
plt.title('pIC50s for protein %d' % pid);
plt.xlabel("Measured pIC50")
plt.ylabel("Predicted pIC50")

In [ ]: