notebook.community

Edit and run



In [128]:

    
import numpy as np
import pandas as pd



In [129]:

    
movies_df = pd.read_csv('movies.csv')



In [130]:

    
movies_df = pd.concat([movies_df, movies_df.genres.str.get_dummies(sep='|')], axis=1)  
movies_df.head(3)









    Out[130]:







  
    
      
      movie_id
      title
      genres
      (no genres listed)
      Action
      Adventure
      Animation
      Children
      Comedy
      Crime
      ...
      Film-Noir
      Horror
      IMAX
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
    
  
  
    
      0
      1
      Toy Story (1995)
      Adventure|Animation|Children|Comedy|Fantasy
      0
      0
      1
      1
      1
      1
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      2
      Jumanji (1995)
      Adventure|Children|Fantasy
      0
      0
      1
      0
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      3
      Grumpier Old Men (1995)
      Comedy|Romance
      0
      0
      0
      0
      0
      1
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
    
  

3 rows × 23 columns



In [131]:

    
movies_df=movies_df.drop(['(no genres listed)','IMAX'],axis=1)



In [132]:

    
movies_category = movies_df.columns[3:]
movies_category









    Out[132]:





Index(['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
       'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'],
      dtype='object')



In [133]:

    
def dot_product(vector1,vector2):
    return sum([ i*j for i,j in zip(vector1, vector2)])

def movie_score(movie_features,user_preferences):
    return dot_product(movie_features, user_preferences)



In [134]:

    
movies_df[movies_df.title.str.contains('Rubber')]









    Out[134]:







  
    
      
      movie_id
      title
      genres
      Action
      Adventure
      Animation
      Children
      Comedy
      Crime
      Documentary
      ...
      Fantasy
      Film-Noir
      Horror
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
    
  
  
    
      7648
      81132
      Rubber (2010)
      Action|Adventure|Comedy|Crime|Drama|Film-Noir|...
      1
      1
      0
      0
      1
      1
      0
      ...
      0
      1
      1
      0
      1
      0
      0
      1
      0
      1
    
  

1 rows × 21 columns



In [135]:

    
eternal=movies_df.loc[7648][3:]
eternal









    Out[135]:





Action         1
Adventure      1
Animation      0
Children       0
Comedy         1
Crime          1
Documentary    0
Drama          1
Fantasy        0
Film-Noir      1
Horror         1
Musical        0
Mystery        1
Romance        0
Sci-Fi         0
Thriller       1
War            0
Western        1
Name: 7648, dtype: object



In [136]:

    
from collections import OrderedDict
user_preferences = OrderedDict(zip(movies_category, []))

user_preferences['Action'] = 0 
user_preferences['Adventure'] = 0  
user_preferences['Animation'] = 0  
user_preferences["Children's"] = 0  
user_preferences["Comedy"] = 0  
user_preferences['Crime'] = 0 
user_preferences['Documentary'] = 0  
user_preferences['Drama'] = 0
user_preferences['Fantasy'] = 0  
user_preferences['Film-Noir'] = 0  
user_preferences['Horror'] = 1
user_preferences['Musical'] = 0  
user_preferences['Mystery'] = 0  
user_preferences['Romance'] = 0  
user_preferences['Sci-Fi'] = 0 
user_preferences['Thriller'] = 1
user_preferences['War'] = 0
user_preferences['Western'] =0



In [137]:

    
eternal_user_predicted_score = dot_product(eternal, user_preferences.values())  
eternal_user_predicted_score









    Out[137]:





2



In [138]:

    
movies_df['score'] = movies_df[movies_category].apply(movie_score,args=([user_preferences.values()]), axis=1)
movies_df.sort_values(by=['score'], ascending=False)['title'][:10]









    Out[138]:





3293                          Jaws: The Revenge (1987)
2393                              Guardian, The (1990)
7369                                     Saw VI (2009)
8755          Girl Walks Home Alone at Night, A (2014)
2386                                       Bats (1999)
8748                     Omen IV: The Awakening (1991)
4028                                   Blade II (2002)
6091    Attack of the Mushroom People (Matango) (1963)
4003                              Resident Evil (2002)
3976                          Trouble Every Day (2001)
Name: title, dtype: object

	movie_id	title	genres	Adventure	Animation	Children	Comedy	...	Romance
0	1	Toy Story (1995)	Adventure\|Animation\|Children\|Comedy\|Fantasy	1	1	1	1	...	0
1	2	Jumanji (1995)	Adventure\|Children\|Fantasy	1	0	1	0	...	0
2	3	Grumpier Old Men (1995)	Comedy\|Romance	0	0	0	1	...	1