notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np

ratings_list = [i.strip().split("::") for i in open('ml-1m/ratings.dat', 'r').readlines()]
users_list = [i.strip().split("::") for i in open('ml-1m/users.dat', 'r').readlines()]
movies_list = [i.strip().split("::") for i in open('ml-1m/movies.dat', 'r').readlines()]

ratings_df = pd.DataFrame(ratings_list, columns = ['UserID', 'MovieID', 'Rating', 'Timestamp'], dtype = int)
movies_df = pd.DataFrame(movies_list, columns = ['MovieID', 'Title', 'Genres'])
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)



In [2]:

    
movies_df.head()









    Out[2]:







  
    
      
      MovieID
      Title
      Genres
    
  
  
    
      0
      1
      Toy Story (1995)
      Animation|Children's|Comedy
    
    
      1
      2
      Jumanji (1995)
      Adventure|Children's|Fantasy
    
    
      2
      3
      Grumpier Old Men (1995)
      Comedy|Romance
    
    
      3
      4
      Waiting to Exhale (1995)
      Comedy|Drama
    
    
      4
      5
      Father of the Bride Part II (1995)
      Comedy



In [3]:

    
ratings_df.head()



In [4]:

    
R_df = ratings_df.pivot(index = 'UserID', columns ='MovieID', values = 'Rating').fillna(0)
R_df.head()









    Out[4]:







  
    
      MovieID
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      ...
      3943
      3944
      3945
      3946
      3947
      3948
      3949
      3950
      3951
      3952
    
    
      UserID
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      5.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      5
      0.0
      0.0
      0.0
      0.0
      0.0
      2.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

5 rows × 3706 columns



In [5]:

    
R = R_df.as_matrix()
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)



In [6]:

    
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_demeaned, k = 50)



In [7]:

    
sigma = np.diag(sigma)



In [8]:

    
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)



In [9]:

    
def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5):
    
    # Get and sort the user's predictions
    user_row_number = userID - 1 # UserID starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.UserID == (userID)]
    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'MovieID', right_on = 'MovieID').
                     sort_values(['Rating'], ascending=False)
                 )

    print 'User {0} has already rated {1} movies.'.format(userID, user_full.shape[0])
    print 'Recommending the highest {0} predicted ratings movies not already rated.'.format(num_recommendations)
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies_df[~movies_df['MovieID'].isin(user_full['MovieID'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'MovieID',
               right_on = 'MovieID').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations

already_rated, predictions = recommend_movies(preds_df, 837, movies_df, ratings_df, 10)









    



User 837 has already rated 69 movies.
Recommending the highest 10 predicted ratings movies not already rated.



In [10]:

    
already_rated.head(10)









    Out[10]:







  
    
      
      UserID
      MovieID
      Rating
      Timestamp
      Title
      Genres
    
  
  
    
      36
      837
      858
      5
      975360036
      Godfather, The (1972)
      Action|Crime|Drama
    
    
      35
      837
      1387
      5
      975360036
      Jaws (1975)
      Action|Horror
    
    
      65
      837
      2028
      5
      975360089
      Saving Private Ryan (1998)
      Action|Drama|War
    
    
      63
      837
      1221
      5
      975360036
      Godfather: Part II, The (1974)
      Action|Crime|Drama
    
    
      11
      837
      913
      5
      975359921
      Maltese Falcon, The (1941)
      Film-Noir|Mystery
    
    
      20
      837
      3417
      5
      975360893
      Crimson Pirate, The (1952)
      Adventure|Comedy|Sci-Fi
    
    
      34
      837
      2186
      4
      975359955
      Strangers on a Train (1951)
      Film-Noir|Thriller
    
    
      55
      837
      2791
      4
      975360893
      Airplane! (1980)
      Comedy
    
    
      31
      837
      1188
      4
      975360920
      Strictly Ballroom (1992)
      Comedy|Romance
    
    
      28
      837
      1304
      4
      975360058
      Butch Cassidy and the Sundance Kid (1969)
      Action|Comedy|Western



In [11]:

    
predictions









    Out[11]:







  
    
      
      MovieID
      Title
      Genres
    
  
  
    
      516
      527
      Schindler's List (1993)
      Drama|War
    
    
      1848
      1953
      French Connection, The (1971)
      Action|Crime|Drama|Thriller
    
    
      596
      608
      Fargo (1996)
      Crime|Drama|Thriller
    
    
      1235
      1284
      Big Sleep, The (1946)
      Film-Noir|Mystery
    
    
      2085
      2194
      Untouchables, The (1987)
      Action|Crime|Drama
    
    
      1188
      1230
      Annie Hall (1977)
      Comedy|Romance
    
    
      1198
      1242
      Glory (1989)
      Action|Drama|War
    
    
      897
      922
      Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)
      Film-Noir
    
    
      1849
      1954
      Rocky (1976)
      Action|Drama
    
    
      581
      593
      Silence of the Lambs, The (1991)
      Drama|Thriller



In [ ]:

	UserID	MovieID	Rating	Timestamp
0	1	1193	5	978300760
1	1	661	3	978302109
2	1	914	3	978301968
3	1	3408	4	978300275
4	1	2355	5	978824291

	MovieID	Title	Genres
0	1	Toy Story (1995)	Animation\|Children's\|Comedy
1	2	Jumanji (1995)	Adventure\|Children's\|Fantasy
2	3	Grumpier Old Men (1995)	Comedy\|Romance
3	4	Waiting to Exhale (1995)	Comedy\|Drama
4	5	Father of the Bride Part II (1995)	Comedy

	UserID	MovieID	Rating	Timestamp	Title	Genres
36	837	858	5	975360036	Godfather, The (1972)	Action\|Crime\|Drama
35	837	1387	5	975360036	Jaws (1975)	Action\|Horror
65	837	2028	5	975360089	Saving Private Ryan (1998)	Action\|Drama\|War
63	837	1221	5	975360036	Godfather: Part II, The (1974)	Action\|Crime\|Drama
11	837	913	5	975359921	Maltese Falcon, The (1941)	Film-Noir\|Mystery
20	837	3417	5	975360893	Crimson Pirate, The (1952)	Adventure\|Comedy\|Sci-Fi
34	837	2186	4	975359955	Strangers on a Train (1951)	Film-Noir\|Thriller
55	837	2791	4	975360893	Airplane! (1980)	Comedy
31	837	1188	4	975360920	Strictly Ballroom (1992)	Comedy\|Romance
28	837	1304	4	975360058	Butch Cassidy and the Sundance Kid (1969)	Action\|Comedy\|Western

	MovieID	Title	Genres
516	527	Schindler's List (1993)	Drama\|War
1848	1953	French Connection, The (1971)	Action\|Crime\|Drama\|Thriller
596	608	Fargo (1996)	Crime\|Drama\|Thriller
1235	1284	Big Sleep, The (1946)	Film-Noir\|Mystery
2085	2194	Untouchables, The (1987)	Action\|Crime\|Drama
1188	1230	Annie Hall (1977)	Comedy\|Romance
1198	1242	Glory (1989)	Action\|Drama\|War
897	922	Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)	Film-Noir
1849	1954	Rocky (1976)	Action\|Drama
581	593	Silence of the Lambs, The (1991)	Drama\|Thriller