In [6]:
# This line configures matplotlib to show figures embedded in the notebook, 
# instead of opening a new window for each figure. 
# If you are using an old version of IPython, try using '%pylab inline' instead.
%matplotlib inline

In [7]:
# import libraries
from pylab import *
import numpy as np

In [8]:
# populate values for x
x = np.linspace(0, 5, 10)
# declare a function - in this case x cubed
y = x ** 3

In [9]:
# Plot the function
figure()
plot(x, y, 'r')
xlabel('x')
ylabel('y')
title('title')
show()



In [1]:
ratings = { 'Wilson Mcgee': {'Frozen': 2, 'Goodfellas': 4,'Life of Pi': 3, 'Minority Report': 5, 'Wedding Crashers': 1},
            'Shelley Beck': {'Frozen': 4, 'Goodfellas': 3,'Life of Pi': 2, 'Minority Report': 3, 'Wedding Crashers': 5},
            'Darrell Wheeler': {'Frozen': 5, 'Goodfellas': 3,'Life of Pi': 1, 'Minority Report': 2, 'Wedding Crashers': 3},
            'May White': {'Frozen': 1, 'Goodfellas': 3, 'Life of Pi': 4, 'Minority Report': 5, 'Wedding Crashers': 2},
            'Jacob Walton': {'Frozen': 4, 'Goodfellas': 2,'Life of Pi': 3, 'Minority Report': 1, 'Wedding Crashers': 5},
            'Alejandro Sanchez': {'Frozen': 2, 'Goodfellas': 4,'Life of Pi': 3, 'Minority Report': 5, 'Wedding Crashers': 1}                      
}

In [8]:
from math import sqrt

# Return a similarity score for two customers 
# using the Pearson Correlation Coefficient (PCC)
def similarity_score(ratings, customer1, customer2):
    # Iterate through ratings from both customers
    score = {}
    for item in ratings[customer1]:
        if item in ratings[customer2]:
            score[item] = 1

    # Determine how many of the same movies both 
    # customers have rated
    n = len(score)
    
    ret_val = 0

    # if there is no overlap in rated movies then 
    # set the score to zero
    if n == 0:
        ret_val = 0
    
    else:
        # Calculate the components needed to compute PCC
        sum1 = sum([ratings[customer1][i] for i in score])
        sum2 = sum([ratings[customer2][i] for i in score])

        sum1_squares = sum([pow(ratings[customer1][i], 2) for i in score])
        
        sum2_squares = sum([pow(ratings[customer2][i], 2) for i in score])

        sum_products = sum([ratings[customer1][i] * ratings[customer2][i] 
                            for i in score])

        numerator = sum_products - ((sum1 * sum2) / n)
        denominator = sqrt((sum1_squares - pow(sum1, 2)/n) * (sum2_squares - pow(sum2, 2)/n))

        if denominator == 0: 
            ret_val = 0
        else:

            ret_val = (numerator/denominator)

    return ret_val

In [9]:
customer1 = 'Wilson Mcgee'
customer2 = 'Alejandro Sanchez'
similarity_score(ratings, customer1, customer2)


Out[9]:
1.0