In [38]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
In [39]:
u_cols = ['user_id','sex','age','occupation', 'zip_code']
r_cols = ['user_id', 'movie_id', 'rating']
m_cols = ['movie_id', 'title', 'genres']
users=pd.read_csv("users.dat",sep='::',names=u_cols, engine='python')
ratings = pd.read_csv('ratings.dat',sep='::', names=r_cols, engine='python', usecols=range(3))
movies = pd.read_csv('movies.dat', sep='::', names=m_cols, engine='python')
#combined=pd.merge(movies,ratings)
#df=pd.merge(combined,users)
In [40]:
ratings.head(1)
Out[40]:
In [41]:
movies.head(1)
Out[41]:
In [42]:
ratings_df = pd.merge(movies,ratings)
ratings_df.head()
Out[42]:
In [43]:
ratings_mtx_df = ratings_df.pivot_table(index='user_id', columns='title',values='rating')
ratings_mtx_df.fillna(0, inplace=True)
In [44]:
ratings_mtx_df.head()
Out[44]:
In [45]:
movie_index = ratings_mtx_df.columns
In [46]:
corr_matrix = cosine_similarity(ratings_mtx_df.T.as_matrix())
np.fill_diagonal(corr_matrix, 0 )#Filling diagonals with 0s for future use when sorting is done
corr= pd.DataFrame(corr_matrix)
In [47]:
corr.head()
Out[47]:
In [48]:
inp = list(movie_index).index('Final Destination (2000)')
inp
Out[48]:
In [49]:
P = corr_matrix[inp]
In [50]:
max(P)
Out[50]:
In [51]:
list(movie_index[(P>0.34) & (P<0.41)])
Out[51]:
In [52]:
corr_matrix = np.corrcoef(ratings_mtx_df.T)
np.fill_diagonal(corr_matrix, 0 )
corr = pd.DataFrame(corr_matrix)
In [53]:
corr.head()
Out[53]:
In [54]:
P = corr_matrix[inp]
In [55]:
max(P)
Out[55]:
In [56]:
list(movie_index[(P>0.28) & (P<0.35)])
Out[56]: