In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
In [2]:
ratings =pd.read_csv('ratings.csv')
ratings.head(1)
Out[2]:
In [3]:
movies = pd.read_csv('movies.csv')
movies.head(1)
Out[3]:
In [4]:
ratings_df = pd.merge(movies,ratings)
ratings_df.head()
Out[4]:
In [5]:
ratings_mtx_df = ratings_df.pivot_table(index='user_id', columns='title',values='rating')
ratings_mtx_df.fillna(0, inplace=True)
In [6]:
ratings_mtx_df.head()
Out[6]:
In [7]:
movie_index = ratings_mtx_df.columns
In [8]:
corr_matrix = cosine_similarity(ratings_mtx_df.T.as_matrix())
np.fill_diagonal(corr_matrix, 0 )#Filling diagonals with 0s for future use when sorting is done
corr= pd.DataFrame(corr_matrix)
In [9]:
corr.head()
Out[9]:
In [10]:
inp = list(movie_index).index('Inception (2010)')
inp
Out[10]:
In [11]:
P = corr_matrix[inp]
In [12]:
max(P)
Out[12]:
In [13]:
list(movie_index[(P>0.5) & (P<0.7)])
Out[13]:
In [14]:
corr_matrix = np.corrcoef(ratings_mtx_df.T)
np.fill_diagonal(corr_matrix, 0 )
corr = pd.DataFrame(corr_matrix)
In [15]:
corr.head()
Out[15]:
In [16]:
P = corr_matrix[inp]
In [17]:
max(P)
Out[17]:
In [18]:
list(movie_index[(P>0.45) & (P<0.6)])
Out[18]: