In [6]:
# This line configures matplotlib to show figures embedded in the notebook,
# instead of opening a new window for each figure.
# If you are using an old version of IPython, try using '%pylab inline' instead.
%matplotlib inline
In [7]:
# import libraries
from pylab import *
import numpy as np
In [8]:
# populate values for x
x = np.linspace(0, 5, 10)
# declare a function - in this case x cubed
y = x ** 3
In [9]:
# Plot the function
figure()
plot(x, y, 'r')
xlabel('x')
ylabel('y')
title('title')
show()
In [1]:
ratings = { 'Wilson Mcgee': {'Frozen': 2, 'Goodfellas': 4,'Life of Pi': 3, 'Minority Report': 5, 'Wedding Crashers': 1},
'Shelley Beck': {'Frozen': 4, 'Goodfellas': 3,'Life of Pi': 2, 'Minority Report': 3, 'Wedding Crashers': 5},
'Darrell Wheeler': {'Frozen': 5, 'Goodfellas': 3,'Life of Pi': 1, 'Minority Report': 2, 'Wedding Crashers': 3},
'May White': {'Frozen': 1, 'Goodfellas': 3, 'Life of Pi': 4, 'Minority Report': 5, 'Wedding Crashers': 2},
'Jacob Walton': {'Frozen': 4, 'Goodfellas': 2,'Life of Pi': 3, 'Minority Report': 1, 'Wedding Crashers': 5},
'Alejandro Sanchez': {'Frozen': 2, 'Goodfellas': 4,'Life of Pi': 3, 'Minority Report': 5, 'Wedding Crashers': 1}
}
In [8]:
from math import sqrt
# Return a similarity score for two customers
# using the Pearson Correlation Coefficient (PCC)
def similarity_score(ratings, customer1, customer2):
# Iterate through ratings from both customers
score = {}
for item in ratings[customer1]:
if item in ratings[customer2]:
score[item] = 1
# Determine how many of the same movies both
# customers have rated
n = len(score)
ret_val = 0
# if there is no overlap in rated movies then
# set the score to zero
if n == 0:
ret_val = 0
else:
# Calculate the components needed to compute PCC
sum1 = sum([ratings[customer1][i] for i in score])
sum2 = sum([ratings[customer2][i] for i in score])
sum1_squares = sum([pow(ratings[customer1][i], 2) for i in score])
sum2_squares = sum([pow(ratings[customer2][i], 2) for i in score])
sum_products = sum([ratings[customer1][i] * ratings[customer2][i]
for i in score])
numerator = sum_products - ((sum1 * sum2) / n)
denominator = sqrt((sum1_squares - pow(sum1, 2)/n) * (sum2_squares - pow(sum2, 2)/n))
if denominator == 0:
ret_val = 0
else:
ret_val = (numerator/denominator)
return ret_val
In [9]:
customer1 = 'Wilson Mcgee'
customer2 = 'Alejandro Sanchez'
similarity_score(ratings, customer1, customer2)
Out[9]: