In [128]:
import numpy as np
import pandas as pd
In [129]:
movies_df = pd.read_csv('movies.csv')
In [130]:
movies_df = pd.concat([movies_df, movies_df.genres.str.get_dummies(sep='|')], axis=1)
movies_df.head(3)
Out[130]:
In [131]:
movies_df=movies_df.drop(['(no genres listed)','IMAX'],axis=1)
In [132]:
movies_category = movies_df.columns[3:]
movies_category
Out[132]:
In [133]:
def dot_product(vector1,vector2):
return sum([ i*j for i,j in zip(vector1, vector2)])
def movie_score(movie_features,user_preferences):
return dot_product(movie_features, user_preferences)
In [134]:
movies_df[movies_df.title.str.contains('Rubber')]
Out[134]:
In [135]:
eternal=movies_df.loc[7648][3:]
eternal
Out[135]:
In [136]:
from collections import OrderedDict
user_preferences = OrderedDict(zip(movies_category, []))
user_preferences['Action'] = 0
user_preferences['Adventure'] = 0
user_preferences['Animation'] = 0
user_preferences["Children's"] = 0
user_preferences["Comedy"] = 0
user_preferences['Crime'] = 0
user_preferences['Documentary'] = 0
user_preferences['Drama'] = 0
user_preferences['Fantasy'] = 0
user_preferences['Film-Noir'] = 0
user_preferences['Horror'] = 1
user_preferences['Musical'] = 0
user_preferences['Mystery'] = 0
user_preferences['Romance'] = 0
user_preferences['Sci-Fi'] = 0
user_preferences['Thriller'] = 1
user_preferences['War'] = 0
user_preferences['Western'] =0
In [137]:
eternal_user_predicted_score = dot_product(eternal, user_preferences.values())
eternal_user_predicted_score
Out[137]:
In [138]:
movies_df['score'] = movies_df[movies_category].apply(movie_score,args=([user_preferences.values()]), axis=1)
movies_df.sort_values(by=['score'], ascending=False)['title'][:10]
Out[138]: