notebook.community

Edit and run



In [1]:

    
import smurff
import matrix_io as mio
import numpy as np

train = mio.read_matrix("train_centered.sdm")
np.mean(train)









    Out[1]:





4.996270558540488e-16



In [2]:

    
train_csr = train.tocsr()
num_nonzeros = np.diff(train_csr.indptr)
num_nonzeros[2]









    Out[2]:





0



In [3]:

    
#row  2 seems empty
train_csr[2,:]









    Out[3]:





<1x526 sparse matrix of type '<class 'numpy.float64'>'
	with 0 stored elements in Compressed Sparse Row format>



In [4]:

    
sess = smurff.PredictSession("root.ini")
print(sess)









    



PredictSession with 9 samples
  Data shape = [622729, 526]
  Beta shape = [29413, 0]
  Num latent = 8



In [5]:

    
#latent representation for compound 2
u_2 = sess.samples[0].latents[0][:,2]
u_2









    Out[5]:





array([ 1.30522453, -0.69799155, -1.63390551,  0.47954038, -1.05110849,
        1.50316563,  0.83644828, -0.45544372])



In [6]:

    
# feature vector compound 2
ecfp = mio.read_matrix("feat_nonzeros.sdm").tocsr()
f_2 = ecfp[2,:]
f_2.shape









    Out[6]:





(1, 29413)



In [7]:

    
beta = sess.samples[0].betas[0]
beta.shape









    Out[7]:





(8, 29413)



In [8]:

    
# predict U from features (NOT adding mean)
u_2_f1 = f_2 * beta.T
u_2_f1









    Out[8]:





array([[ 0.61913943, -0.30052548, -0.22481842,  0.37239162,  0.09803664,
         0.32319267,  0.34434824, -0.39687207]])



In [9]:

    
umean = np.mean(sess.samples[0].latents[0], axis=1)
umean, umean.shape









    Out[9]:





(array([-1.68405648e-04,  2.95383937e-02, -4.61079916e-01, -2.99911753e-01,
        -7.19341866e-01,  9.13175546e-01,  6.47010994e-01, -4.70864672e-02]),
 (8,))



In [10]:

    
# predict U from features (YES adding mean)
u_2_f2 = f_2 * beta.T + umean
u_2_f2









    Out[10]:





array([[ 0.61897103, -0.27098709, -0.68589834,  0.07247986, -0.62130522,
         1.23636822,  0.99135923, -0.44395854]])



In [11]:

    
u_2 - u_2_f2









    Out[11]:





array([[ 0.6862535 , -0.42700447, -0.94800717,  0.40706052, -0.42980327,
         0.26679741, -0.15491095, -0.01148518]])



In [12]:

    
Pf = sess.predict([f_2,0])
np.mean(Pf)









    Out[12]:





2.5946549137201806



In [13]:

    
Pu = sess.predict([2,0])
np.mean(Pu)









    Out[13]:





2.8432540752024207



In [ ]: