In [128]:
import numpy as np
import pandas as pd

In [129]:
movies_df = pd.read_csv('movies.csv')

In [130]:
movies_df = pd.concat([movies_df, movies_df.genres.str.get_dummies(sep='|')], axis=1)  
movies_df.head(3)


Out[130]:
movie_id title genres (no genres listed) Action Adventure Animation Children Comedy Crime ... Film-Noir Horror IMAX Musical Mystery Romance Sci-Fi Thriller War Western
0 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy 0 0 1 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
1 2 Jumanji (1995) Adventure|Children|Fantasy 0 0 1 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 3 Grumpier Old Men (1995) Comedy|Romance 0 0 0 0 0 1 0 ... 0 0 0 0 0 1 0 0 0 0

3 rows × 23 columns


In [131]:
movies_df=movies_df.drop(['(no genres listed)','IMAX'],axis=1)

In [132]:
movies_category = movies_df.columns[3:]
movies_category


Out[132]:
Index(['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
       'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'],
      dtype='object')

In [133]:
def dot_product(vector1,vector2):
    return sum([ i*j for i,j in zip(vector1, vector2)])

def movie_score(movie_features,user_preferences):
    return dot_product(movie_features, user_preferences)

In [134]:
movies_df[movies_df.title.str.contains('Rubber')]


Out[134]:
movie_id title genres Action Adventure Animation Children Comedy Crime Documentary ... Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller War Western
7648 81132 Rubber (2010) Action|Adventure|Comedy|Crime|Drama|Film-Noir|... 1 1 0 0 1 1 0 ... 0 1 1 0 1 0 0 1 0 1

1 rows × 21 columns


In [135]:
eternal=movies_df.loc[7648][3:]
eternal


Out[135]:
Action         1
Adventure      1
Animation      0
Children       0
Comedy         1
Crime          1
Documentary    0
Drama          1
Fantasy        0
Film-Noir      1
Horror         1
Musical        0
Mystery        1
Romance        0
Sci-Fi         0
Thriller       1
War            0
Western        1
Name: 7648, dtype: object

In [136]:
from collections import OrderedDict
user_preferences = OrderedDict(zip(movies_category, []))

user_preferences['Action'] = 0 
user_preferences['Adventure'] = 0  
user_preferences['Animation'] = 0  
user_preferences["Children's"] = 0  
user_preferences["Comedy"] = 0  
user_preferences['Crime'] = 0 
user_preferences['Documentary'] = 0  
user_preferences['Drama'] = 0
user_preferences['Fantasy'] = 0  
user_preferences['Film-Noir'] = 0  
user_preferences['Horror'] = 1
user_preferences['Musical'] = 0  
user_preferences['Mystery'] = 0  
user_preferences['Romance'] = 0  
user_preferences['Sci-Fi'] = 0 
user_preferences['Thriller'] = 1
user_preferences['War'] = 0
user_preferences['Western'] =0

In [137]:
eternal_user_predicted_score = dot_product(eternal, user_preferences.values())  
eternal_user_predicted_score


Out[137]:
2

In [138]:
movies_df['score'] = movies_df[movies_category].apply(movie_score,args=([user_preferences.values()]), axis=1)
movies_df.sort_values(by=['score'], ascending=False)['title'][:10]


Out[138]:
3293                          Jaws: The Revenge (1987)
2393                              Guardian, The (1990)
7369                                     Saw VI (2009)
8755          Girl Walks Home Alone at Night, A (2014)
2386                                       Bats (1999)
8748                     Omen IV: The Awakening (1991)
4028                                   Blade II (2002)
6091    Attack of the Mushroom People (Matango) (1963)
4003                              Resident Evil (2002)
3976                          Trouble Every Day (2001)
Name: title, dtype: object