In [ ]:
import os,sys,inspect
import os
import joblib
import tensorflow as tf
import numpy as np
import h5py
import scipy.sparse.linalg as la
import scipy.sparse as sp
import scipy
import time
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
%matplotlib inline
In [ ]:
path_dataset = '../../datasets/douban/training_test_dataset.mat' #with only social network
In [ ]:
# auxiliary functions:
# import matlab files in python
def load_matlab_file(path_file, name_field):
"""
load '.mat' files
inputs:
path_file, string containing the file path
name_field, string containig the field name (default='shape')
warning:
'.mat' files should be saved in the '-v7.3' format
"""
db = h5py.File(path_file, 'r')
ds = db[name_field]
try:
if 'ir' in ds.keys():
data = np.asarray(ds['data'])
ir = np.asarray(ds['ir'])
jc = np.asarray(ds['jc'])
out = sp.csc_matrix((data, ir, jc)).astype(np.float32)
except AttributeError:
# Transpose in case is a dense matrix because of the row- vs column- major ordering between python and matlab
out = np.asarray(ds).astype(np.float32).T
db.close()
return out
In [ ]:
#loading of the required matrices
M = load_matlab_file(path_dataset, 'M')
Otraining = load_matlab_file(path_dataset, 'Otraining')
Otest = load_matlab_file(path_dataset, 'Otest')
Wrow = load_matlab_file(path_dataset, 'W_users') #dense
In [ ]:
np.random.seed(0)
pos_tr_samples = np.where(Otraining)
num_tr_samples = len(pos_tr_samples[0])
list_idx = range(num_tr_samples)
np.random.shuffle(list_idx)
idx_data = list_idx[:num_tr_samples//2]
idx_train = list_idx[num_tr_samples//2:]
pos_data_samples = (pos_tr_samples[0][idx_data], pos_tr_samples[1][idx_data])
pos_tr_samples = (pos_tr_samples[0][idx_train], pos_tr_samples[1][idx_train])
Odata = np.zeros(M.shape)
Otraining = np.zeros(M.shape)
for k in range(len(pos_data_samples[0])):
Odata[pos_data_samples[0][k], pos_data_samples[1][k]] = 1
for k in range(len(pos_tr_samples[0])):
Otraining[pos_tr_samples[0][k], pos_tr_samples[1][k]] = 1
print 'Num data samples: %d' % (np.sum(Odata),)
print 'Num train samples: %d' % (np.sum(Otraining),)
print 'Num train+data samples: %d' % (np.sum(Odata+Otraining),)
In [ ]:
#computation of the normalized laplacians
Lrow = sp.csgraph.laplacian(Wrow, normed=True)
In [ ]:
#apply SVD initially for detecting the main components of our initialization
U, s, V = np.linalg.svd(Odata*M, full_matrices=0)
print U.shape
print s.shape
print V.shape
In [ ]:
rank_W_H = 10
partial_s = s[:rank_W_H]
partial_S_sqrt = np.diag(np.sqrt(partial_s))
initial_W = np.dot(U[:, :rank_W_H], partial_S_sqrt)
initial_H = np.dot(partial_S_sqrt, V[:rank_W_H, :]).T
print initial_W.shape
print initial_H.shape
In [ ]:
print 'Original training matrix'
plt.figure()
plt.imshow(Odata*M)
plt.colorbar()
print 'Reconstructed training matrix'
plt.figure()
plt.imshow(np.dot(initial_W, initial_H.T))
plt.colorbar()
In [ ]:
class Train_test_matrix_completion:
"""
The neural network model.
"""
def frobenius_norm(self, tensor):
square_tensor = tf.square(tensor)
tensor_sum = tf.reduce_sum(square_tensor)
frobenius_norm = tf.sqrt(tensor_sum)
return frobenius_norm
def mono_conv(self, list_lap, ord_conv, A, W, b):
feat = []
#collect features
for k in range(ord_conv):
c_lap = list_lap[k]
#dense implementation
c_feat = tf.matmul(c_lap, A, a_is_sparse=False)
feat.append(c_feat)
all_feat = tf.concat(feat,1)
conv_feat = tf.matmul(all_feat, W) + b
conv_feat = tf.nn.relu(conv_feat)
return conv_feat
def compute_cheb_polynomials(self, L, ord_cheb, list_cheb):
for k in range(ord_cheb):
if (k==0):
list_cheb.append(tf.cast(tf.diag(tf.ones([tf.shape(L)[0],])), 'float32'))
elif (k==1):
list_cheb.append(tf.cast(L, 'float32'))
else:
list_cheb.append(2*tf.matmul(L, list_cheb[k-1]) - list_cheb[k-2])
def __init__(self, M, Lr, Odata, Otraining, Otest, initial_W, initial_H,
order_chebyshev_row = 5,
num_iterations = 10, gamma=1.0, gamma_H=1.0, gamma_W=1.0, learning_rate=1e-4, idx_gpu = '/gpu:1'):
#order of the spectral filters
self.ord_row = order_chebyshev_row
self.num_iterations = num_iterations
self.n_conv_feat = 32
with tf.Graph().as_default() as g:
tf.logging.set_verbosity(tf.logging.ERROR)
self.graph = g
tf.set_random_seed(0)
with tf.device(idx_gpu):
#loading of the laplacians
self.Lr = tf.cast(Lr, 'float32')
self.norm_Lr = self.Lr - tf.diag(tf.ones([Lr.shape[0], ]))
#compute all chebyshev polynomials a priori
self.list_row_cheb_pol = list()
self.compute_cheb_polynomials(self.norm_Lr, self.ord_row, self.list_row_cheb_pol)
#definition of constant matrices
self.M = tf.constant(M, dtype=tf.float32)
self.Odata = tf.constant(Odata, dtype=tf.float32)
self.Otraining = tf.constant(Otraining, dtype=tf.float32) #training mask
self.Otest = tf.constant(Otest, dtype=tf.float32) #test mask
##################################definition of the NN variables#####################################
#definition of the weights for extracting the global features
self.W_conv_W = tf.get_variable("W_conv_W", shape=[self.ord_row*initial_W.shape[1], self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.b_conv_W = tf.Variable(tf.zeros([self.n_conv_feat,]))
#recurrent N parameters
self.W_f_u = tf.get_variable("W_f_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.W_i_u = tf.get_variable("W_i_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.W_o_u = tf.get_variable("W_o_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.W_c_u = tf.get_variable("W_c_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.U_f_u = tf.get_variable("U_f_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.U_i_u = tf.get_variable("U_i_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.U_o_u = tf.get_variable("U_o_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.U_c_u = tf.get_variable("U_c_u", shape=[self.n_conv_feat, self.n_conv_feat], initializer=tf.contrib.layers.xavier_initializer())
self.b_f_u = tf.Variable(tf.zeros([self.n_conv_feat,]))
self.b_i_u = tf.Variable(tf.zeros([self.n_conv_feat,]))
self.b_o_u = tf.Variable(tf.zeros([self.n_conv_feat,]))
self.b_c_u = tf.Variable(tf.zeros([self.n_conv_feat,]))
#output parameters
self.W_out_W = tf.get_variable("W_out_W", shape=[self.n_conv_feat, initial_W.shape[1]], initializer=tf.contrib.layers.xavier_initializer())
self.b_out_W = tf.Variable(tf.zeros([initial_W.shape[1],]))
#########definition of the NN
#definition of W and H
self.W = tf.constant(initial_W.astype('float32'))
self.H = tf.Variable(initial_H.astype('float32'))
self.X = tf.matmul(self.W, self.H, transpose_b=True) #we may initialize it at random here
self.list_X = list()
self.list_X.append(tf.identity(self.X))
#RNN
self.h_u = tf.zeros([M.shape[0], self.n_conv_feat])
self.c_u = tf.zeros([M.shape[0], self.n_conv_feat])
for k in range(self.num_iterations):
#extraction of global features vectors
self.final_feat_users = self.mono_conv(self.list_row_cheb_pol, self.ord_row, self.W, self.W_conv_W, self.b_conv_W)
#here we have to split the features between users and movies LSTMs
#users RNN
self.f_u = tf.sigmoid(tf.matmul(self.final_feat_users, self.W_f_u) + tf.matmul(self.h_u, self.U_f_u) + self.b_f_u)
self.i_u = tf.sigmoid(tf.matmul(self.final_feat_users, self.W_i_u) + tf.matmul(self.h_u, self.U_i_u) + self.b_i_u)
self.o_u = tf.sigmoid(tf.matmul(self.final_feat_users, self.W_o_u) + tf.matmul(self.h_u, self.U_o_u) + self.b_o_u)
self.update_c_u = tf.sigmoid(tf.matmul(self.final_feat_users, self.W_c_u) + tf.matmul(self.h_u, self.U_c_u) + self.b_c_u)
self.c_u = tf.multiply(self.f_u, self.c_u) + tf.multiply(self.i_u, self.update_c_u)
self.h_u = tf.multiply(self.o_u, tf.sigmoid(self.c_u))
#compute update of matrix X
self.delta_W = tf.tanh(tf.matmul(self.c_u, self.W_out_W) + self.b_out_W) #N x rank_W_H
self.W += self.delta_W
self.X = tf.matmul(self.W, self.H, transpose_b=True)
self.list_X.append(tf.identity(tf.reshape(self.X, [tf.shape(self.M)[0], tf.shape(self.M)[1]])))
self.X = tf.matmul(self.W, self.H, transpose_b=True)
#########loss definition
#computation of the accuracy term
self.norm_X = 1+4*(self.X-tf.reduce_min(self.X))/(tf.reduce_max(self.X-tf.reduce_min(self.X)))
frob_tensor = tf.multiply(self.Otraining + self.Odata, self.norm_X - M)
self.loss_frob = tf.square(self.frobenius_norm(frob_tensor))/np.sum(Otraining+Odata)
#computation of the regularization terms
trace_row_tensor = tf.matmul(tf.matmul(self.X, self.Lr, transpose_a=True), self.X)
self.loss_trace_row = tf.trace(trace_row_tensor)/tf.cast(tf.shape(self.X)[0]*tf.shape(self.X)[1],'float32')
self.frob_norm_H = tf.square(self.frobenius_norm(self.H))/tf.cast(tf.shape(self.H)[0]*tf.shape(self.H)[1], 'float32')
self.frob_norm_W = tf.square(self.frobenius_norm(self.W))/tf.cast(tf.shape(self.W)[0]*tf.shape(self.W)[1], 'float32')
#training loss definition
self.loss = self.loss_frob + (gamma/2)*self.loss_trace_row + (gamma_H/2)*self.frob_norm_H + (gamma_W/2)*self.frob_norm_W
#test loss definition
self.predictions = tf.multiply(self.Otest, self.norm_X - self.M)
self.predictions_error = self.frobenius_norm(self.predictions)
#definition of the solver
self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss)
self.var_grad = tf.gradients(self.loss, tf.trainable_variables())
self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad],0))
# Create a session for running Ops on the Graph.
config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
self.session = tf.Session(config=config)
# Run the Op to initialize the variables.
init = tf.initialize_all_variables()
self.session.run(init)
In [ ]:
ord_col = 5
ord_row = 5
learning_obj = Train_test_matrix_completion(M, Lrow, Odata, Otraining, Otest,
initial_W, initial_H,
order_chebyshev_row = ord_row,
gamma=1e2, gamma_H=1e2, gamma_W=1e0,
learning_rate=1e-3)
num_iter_test = 10
num_total_iter_training = 5000
num_iter = 0
list_training_loss = list()
list_training_norm_grad = list()
list_test_pred_error = list()
list_predictions = list()
list_X = list()
list_training_times = list()
list_test_times = list()
list_grad_X = list()
list_X_evolutions = list()
In [ ]:
num_iter = 0
for k in range(num_iter, num_total_iter_training):
tic = time.time()
_, current_training_loss, norm_grad, X_grad = learning_obj.session.run([learning_obj.optimizer, learning_obj.loss,
learning_obj.norm_grad, learning_obj.var_grad])
training_time = time.time() - tic
list_training_loss.append(current_training_loss)
list_training_norm_grad.append(norm_grad)
list_training_times.append(training_time)
if (np.mod(num_iter, num_iter_test)==0):
msg = "[TRN] iter = %03i, cost = %3.2e, |grad| = %.2e (%3.2es)" \
% (num_iter, list_training_loss[-1], list_training_norm_grad[-1], training_time)
print msg
#Test Code
tic = time.time()
pred_error, preds, X = learning_obj.session.run([learning_obj.predictions_error, learning_obj.predictions,
learning_obj.norm_X])
test_time = time.time() - tic
list_test_pred_error.append(pred_error)
list_test_times.append(test_time)
RMSE = np.sqrt(np.square(pred_error)/np.sum(Otest))
msg = "[TST] iter = %03i, cost = %3.2e, RMSE = %3.2e (%3.2es)" % (num_iter, list_test_pred_error[-1], RMSE, test_time)
print msg
num_iter += 1
In [ ]:
fig, ax1 = plt.subplots(figsize=(20,10))
ax2 = ax1.twinx()
ax1.plot(np.arange(len(list_training_loss)), list_training_loss, 'g-')
ax2.plot(np.arange(len(list_test_pred_error))*num_iter_test, list_test_pred_error, 'b-')
ax1.set_xlabel('Iteration')
ax1.set_ylabel('Training loss', color='g')
ax2.set_ylabel('Test loss', color='b')
best_iter = (np.where(np.asarray(list_training_loss)==np.min(list_training_loss))[0][0]//num_iter_test)*num_iter_test
best_pred_error = list_test_pred_error[best_iter//num_iter_test]
print 'Best predictions at iter: %d (error: %f)' % (best_iter, best_pred_error)
RMSE = np.sqrt(np.square(best_pred_error)/np.sum(Otest))
print 'RMSE: %f' % RMSE
In [ ]:
#last X generated
plt.figure(figsize=(20,10))
plt.imshow(X)
plt.colorbar()
In [ ]: