In [1]:
import os
os.chdir('..')
In [2]:
# Import all the packages we need to generate recommendations
import numpy as np
import pandas as pd
import src.utils as utils
import src.recommenders as recommenders
import src.similarity as similarity
# Enable logging on Jupyter notebook
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
In [3]:
# imports necesary for plotting
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
Here, we create some fake data with fo users and 4 items; see that some ratings are missing, something common in a real recommender system
In [4]:
data=[[1, 1, 5],[1, 2, 4],[1, 3, 3],
[2, 1, 3], [2, 2, 4],[2, 3, 5],[2, 4, 2],
[3, 2, 2], [3, 3, 4],[3, 4, 4]]
ratings = pd.DataFrame(columns=["customer", "movie", "rating"], data=data)
ratings
Out[4]:
In [5]:
# the data is stored in a long pandas dataframe
# we need to pivot the data to create a [user x movie] matrix
ratings_matrix = ratings.pivot_table(index='customer', columns='movie', values='rating', fill_value=0)
ratings_matrix
Out[5]:
In [6]:
target_customer = 3
similarity_metric = "cosine"
K = 10
In [7]:
# get the nearest neighbours and compute the total distance
# only compute from [1:K] to avoid self correlation (index 0)
neighbours = similarity.compute_nearest_neighbours(target_customer, ratings_matrix, similarity_metric)[1:K+1]
neighbours
Out[7]:
In [8]:
recommendations = {}
simSums = {}
supportRatings = {}
# Iterate through the k nearest neighbors, accumulating their ratings
for neighbour in neighbours.item.unique():
weight = neighbours.similarity[neighbours.item == neighbour]
neighbour_ratings = ratings.ix[ratings.customer == neighbour]
# calculate the predicted rating for each recommendations
for movie in neighbour_ratings.movie.unique():
prediction = neighbour_ratings.rating[neighbour_ratings.movie == movie]*weight.values[0]
# if there is a new movie, set the similarity and sums to 0
recommendations.setdefault(movie, 0)
simSums.setdefault(movie, 0)
supportRatings.setdefault(movie, 0)
recommendations[movie] += prediction.values[0]
simSums[movie] += weight.values[0]
supportRatings[movie] += 1
In [9]:
recommendations
Out[9]:
In [10]:
# normalise so that the sum of weights for each movie adds to 1
recs_normalized = [(recommendations/simSums[movie], movie) for movie, recommendations in recommendations.items()]
recs_normalized
Out[10]:
In [11]:
# normalise so that the sum of weights for each movie adds to 1 and we have a threshold in place
threshold = 2
recs_normalized = [(recommendations/simSums[movie]*min(supportRatings[movie]-(threshold-1), 1), movie) for movie, recommendations in recommendations.items()]
recs_normalized
Out[11]:
In [12]:
# for example the recommendation for item 1 would be calculated as
rec_item_1 = (0.471405*5 + 0.816497*3)/(0.816497+0.471405)
rec_item_1
Out[12]: