Load Pandas, we are going to need it for manipulating data
In [1]:
import pandas as pd
import numpy as np
from IPython.display import Image
np.set_printoptions(precision = 3)
Now load the data
In [87]:
user_features_df = pd.read_csv("user_features.csv")
item_features_df = pd.read_csv("item_features.csv")
user_features_df["key"] = 0
user_features_df["user_id"] = range(0,user_features_df.shape[0])
item_features_df["key"] = 0
item_features_df["item_id"] = range(0,item_features_df.shape[0])
merged_df = pd.merge(user_features_df, item_features_df,left_index=True,on="key")
merged_df[["item_id", "user_id"]]
merged_df["rating"] = map(lambda ids: user_ratings_df.values[ids[1]][ids[2]],
merged[["user_id", "item_id"]].itertuples())
train = merged_df.dropna()
test = merged_df[merged.isnull().any(axis=1)]
print test.to_latex()
In [69]:
n_latent_features = 2
user_ratings = data.values
latent_user_preferences = np.random.random((user_ratings.shape[0], n_latent_features))
latent_item_features = np.random.random((user_ratings.shape[1],n_latent_features))
user_features = user_features_df.values
item_features = item_features_df.values
print item_features_df.to_latex()
user_features = np.concatenate([np.ones(shape = (user_features.shape[0],1)), user_features], axis = 1)
item_features = np.concatenate([np.ones(shape = (item_features.shape[0],1)), item_features], axis = 1)
user_features_weights = np.random.random((user_ratings.shape[0], user_features.shape[1] ))
item_features_weights = np.random.random((user_ratings.shape[1],item_features.shape[1] ))
# print user_features
In [10]:
def predict_rating(user_id,item_id):
""" Predict a rating given a user_id and an item_id.
"""
user_preference = latent_user_preferences[user_id]
item_preference = latent_item_features[item_id]
user_score = user_features_weights[user_id].dot(user_features[user_id])
item_score = item_features_weights[item_id].dot(item_features[item_id])
#print user_preference.dot(item_preference), user_score, item_score
return user_preference.dot(item_preference) + user_score + item_score
def train(user_id, item_id, rating,alpha = 0.001,
latent_feature_weight_decay = 0.1,
user_weight_decay = 0.01,
item_weight_decay = 0.0001):
#print item_id
prediction_rating = predict_rating(user_id, item_id)
err = ( prediction_rating - rating );
#print err
user_pref_values = latent_user_preferences[user_id][:]
latent_user_preferences[user_id] -= alpha * err * ( latent_item_features[item_id] + latent_feature_weight_decay*latent_user_preferences[user_id])
latent_item_features[item_id] -= alpha * err * ( user_pref_values + latent_feature_weight_decay*latent_item_features[item_id])
user_features_weights[user_id] -=alpha * err *( user_features[user_id] + user_weight_decay* user_features_weights[user_id])
item_features_weights[item_id] -=alpha * err * ( item_features_weights[item_id] + item_weight_decay* item_features_weights[item_id])
return err
def sgd(iterations = 30000):
""" Iterate over all users and all items and train for
a certain number of iterations
"""
for iteration in range(0,iterations):
error = []
for user_id in range(0,latent_user_preferences.shape[0]):
for item_id in range(0,latent_item_features.shape[0]):
rating = user_ratings[user_id][item_id]
if(not np.isnan(rating)):
err = train(user_id,item_id,rating)
error.append(err)
mse = (np.array(error) ** 2).mean()
print mse
In [11]:
for _ in range(0,10):
sgd()
In [6]:
predictions = np.zeros(shape = (latent_user_preferences.shape[0], latent_item_features.shape[0]) )
#print latent_user_preferences
print user_features_weights
print item_features_weights
for user_id in range(0,latent_user_preferences.shape[0]):
for item_id in range(0,latent_item_features.shape[0]):
predictions[user_id,item_id] = predict_rating(user_id,item_id)
In [7]:
values = [zip(user_ratings[i], predictions[i]) for i in range(0,predictions.shape[0])]
comparison_data = pd.DataFrame(values)
comparison_data.columns = data.columns
comparison_data.applymap(lambda (x,y): "(%2.3f|%2.3f)"%(x,y))
Out[7]:
In [8]:
comparison_data
Out[8]:
In [9]:
d = comparison_data.to_latex()
text_file = open("comparison.txt", "w")
text_file.write(d)
text_file.close()
In [ ]:
In [ ]:
In [ ]:
In [ ]: