notebook.community

Edit and run



In [29]:

    
import numpy as np
import pandas as pd



In [30]:

    
m_cols = ['movie_id', 'title', 'genres']
movies_df = pd.read_csv('movies.dat', sep='::', names=m_cols, engine='python')
movies_df.title=movies_df.title.astype(str)



In [31]:

    
movies_df = pd.concat([movies_df, movies_df.genres.str.get_dummies(sep='|')], axis=1)  
movies_df.head(3)









    Out[31]:







  
    
      
      movie_id
      title
      genres
      Action
      Adventure
      Animation
      Children's
      Comedy
      Crime
      Documentary
      ...
      Fantasy
      Film-Noir
      Horror
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
    
  
  
    
      0
      1
      Toy Story (1995)
      Animation|Children's|Comedy
      0
      0
      1
      1
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      2
      Jumanji (1995)
      Adventure|Children's|Fantasy
      0
      1
      0
      1
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      3
      Grumpier Old Men (1995)
      Comedy|Romance
      0
      0
      0
      0
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      0
      0
    
  

3 rows × 21 columns



In [32]:

    
movies_category = movies_df.columns[3:]
movies_category









    Out[32]:





Index(['Action', 'Adventure', 'Animation', 'Children's', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
       'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'],
      dtype='object')



In [33]:

    
def dot_product(vector1,vector2):
    return sum([ i*j for i,j in zip(vector1, vector2)])

def movie_score(movie_features,user_preferences):
    return dot_product(movie_features, user_preferences)



In [34]:

    
movies_df[movies_df.title.str.contains('Shawshank')]









    Out[34]:







  
    
      
      movie_id
      title
      genres
      Action
      Adventure
      Animation
      Children's
      Comedy
      Crime
      Documentary
      ...
      Fantasy
      Film-Noir
      Horror
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
    
  
  
    
      315
      318
      Shawshank Redemption, The (1994)
      Drama
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

1 rows × 21 columns



In [35]:

    
eternal=movies_df.loc[315][3:]
eternal









    Out[35]:





Action         0
Adventure      0
Animation      0
Children's     0
Comedy         0
Crime          0
Documentary    0
Drama          1
Fantasy        0
Film-Noir      0
Horror         0
Musical        0
Mystery        0
Romance        0
Sci-Fi         0
Thriller       0
War            0
Western        0
Name: 315, dtype: object



In [44]:

    
from collections import OrderedDict
user_preferences = OrderedDict(zip(movies_category, []))

user_preferences['Action'] = 0 
user_preferences['Adventure'] = 0  
user_preferences['Animation'] = 0  
user_preferences["Children's"] = 0  
user_preferences["Comedy"] = 0  
user_preferences['Crime'] = 0 
user_preferences['Documentary'] = 0  
user_preferences['Drama'] = 0
user_preferences['Fantasy'] = 0  
user_preferences['Film-Noir'] = 0  
user_preferences['Horror'] = 1
user_preferences['Musical'] = 0  
user_preferences['Mystery'] = 1  
user_preferences['Romance'] = 0  
user_preferences['Sci-Fi'] = 0 
user_preferences['Thriller'] = 1
user_preferences['War'] = 0
user_preferences['Western'] =0



In [40]:

    
eternal_user_predicted_score = dot_product(eternal, user_preferences.values())  
eternal_user_predicted_score









    Out[40]:





0



In [45]:

    
movies_df['score'] = movies_df[movies_category].apply(movie_score,args=([user_preferences.values()]), axis=1)
movies_df.sort_values(by=['score'], ascending=False)['title'][:10]









    Out[45]:





3407                           Jacob's Ladder (1990)
2269    I Still Know What You Did Last Summer (1998)
1598          I Know What You Did Last Summer (1997)
3204                                 Scream 3 (2000)
1599                    Devil's Advocate, The (1997)
1070                        Dial M for Murder (1954)
1201                                   Psycho (1960)
3592                         Puppet Master II (1990)
2057                               Snake Eyes (1998)
2198                          Mortal Thoughts (1991)
Name: title, dtype: object

	movie_id	title	genres	Adventure	Animation	Children's	Comedy	...	Fantasy	Romance
0	1	Toy Story (1995)	Animation\|Children's\|Comedy	0	1	1	1	...	0	0
1	2	Jumanji (1995)	Adventure\|Children's\|Fantasy	1	0	1	0	...	1	0
2	3	Grumpier Old Men (1995)	Comedy\|Romance	0	0	0	1	...	0	1