In [29]:
import numpy as np
import pandas as pd
In [30]:
m_cols = ['movie_id', 'title', 'genres']
movies_df = pd.read_csv('movies.dat', sep='::', names=m_cols, engine='python')
movies_df.title=movies_df.title.astype(str)
In [31]:
movies_df = pd.concat([movies_df, movies_df.genres.str.get_dummies(sep='|')], axis=1)
movies_df.head(3)
Out[31]:
In [32]:
movies_category = movies_df.columns[3:]
movies_category
Out[32]:
In [33]:
def dot_product(vector1,vector2):
return sum([ i*j for i,j in zip(vector1, vector2)])
def movie_score(movie_features,user_preferences):
return dot_product(movie_features, user_preferences)
In [34]:
movies_df[movies_df.title.str.contains('Shawshank')]
Out[34]:
In [35]:
eternal=movies_df.loc[315][3:]
eternal
Out[35]:
In [44]:
from collections import OrderedDict
user_preferences = OrderedDict(zip(movies_category, []))
user_preferences['Action'] = 0
user_preferences['Adventure'] = 0
user_preferences['Animation'] = 0
user_preferences["Children's"] = 0
user_preferences["Comedy"] = 0
user_preferences['Crime'] = 0
user_preferences['Documentary'] = 0
user_preferences['Drama'] = 0
user_preferences['Fantasy'] = 0
user_preferences['Film-Noir'] = 0
user_preferences['Horror'] = 1
user_preferences['Musical'] = 0
user_preferences['Mystery'] = 1
user_preferences['Romance'] = 0
user_preferences['Sci-Fi'] = 0
user_preferences['Thriller'] = 1
user_preferences['War'] = 0
user_preferences['Western'] =0
In [40]:
eternal_user_predicted_score = dot_product(eternal, user_preferences.values())
eternal_user_predicted_score
Out[40]:
In [45]:
movies_df['score'] = movies_df[movies_category].apply(movie_score,args=([user_preferences.values()]), axis=1)
movies_df.sort_values(by=['score'], ascending=False)['title'][:10]
Out[45]: