In [1]:
import smurff
import matrix_io as mio
import numpy as np

train = mio.read_matrix("train_centered.sdm")
np.mean(train)


Out[1]:
4.996270558540488e-16

In [2]:
train_csr = train.tocsr()
num_nonzeros = np.diff(train_csr.indptr)
num_nonzeros[2]


Out[2]:
0

In [3]:
#row  2 seems empty
train_csr[2,:]


Out[3]:
<1x526 sparse matrix of type '<class 'numpy.float64'>'
	with 0 stored elements in Compressed Sparse Row format>

In [4]:
sess = smurff.PredictSession("root.ini")
print(sess)


PredictSession with 9 samples
  Data shape = [622729, 526]
  Beta shape = [29413, 0]
  Num latent = 8

In [5]:
#latent representation for compound 2
u_2 = sess.samples[0].latents[0][:,2]
u_2


Out[5]:
array([ 1.30522453, -0.69799155, -1.63390551,  0.47954038, -1.05110849,
        1.50316563,  0.83644828, -0.45544372])

In [6]:
# feature vector compound 2
ecfp = mio.read_matrix("feat_nonzeros.sdm").tocsr()
f_2 = ecfp[2,:]
f_2.shape


Out[6]:
(1, 29413)

In [7]:
beta = sess.samples[0].betas[0]
beta.shape


Out[7]:
(8, 29413)

In [8]:
# predict U from features (NOT adding mean)
u_2_f1 = f_2 * beta.T
u_2_f1


Out[8]:
array([[ 0.61913943, -0.30052548, -0.22481842,  0.37239162,  0.09803664,
         0.32319267,  0.34434824, -0.39687207]])

In [9]:
umean = np.mean(sess.samples[0].latents[0], axis=1)
umean, umean.shape


Out[9]:
(array([-1.68405648e-04,  2.95383937e-02, -4.61079916e-01, -2.99911753e-01,
        -7.19341866e-01,  9.13175546e-01,  6.47010994e-01, -4.70864672e-02]),
 (8,))

In [10]:
# predict U from features (YES adding mean)
u_2_f2 = f_2 * beta.T + umean
u_2_f2


Out[10]:
array([[ 0.61897103, -0.27098709, -0.68589834,  0.07247986, -0.62130522,
         1.23636822,  0.99135923, -0.44395854]])

In [11]:
u_2 - u_2_f2


Out[11]:
array([[ 0.6862535 , -0.42700447, -0.94800717,  0.40706052, -0.42980327,
         0.26679741, -0.15491095, -0.01148518]])

In [12]:
Pf = sess.predict([f_2,0])
np.mean(Pf)


Out[12]:
2.5946549137201806

In [13]:
Pu = sess.predict([2,0])
np.mean(Pu)


Out[13]:
2.8432540752024207

In [ ]: